Data Sources and Variables

Summary

caaspp_scores_path <- "data/sb_ca2023_1_csv_v1.txt"
caaspp <- read_delim(caaspp_scores_path, delim="^")
## Rows: 102489 Columns: 33
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "^"
## chr (27): County Code, District Code, School Code, Test Type, Total Tested a...
## dbl  (5): Test Year, Student Group ID, Grade, Test ID, Type ID
## lgl  (1): Filler
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
colnames(caaspp) <- c(
  "county_code",
  "district_code",
  "school_code",
  "filler",
  "test_year",
  "student_group_id",
  "test_type",
  "total_tested_reporting_level",
  "total_tested_with_scores",
  "grade",
  "test_id",
  "students_enrolled",
  "students_tested",
  "mean_scale_score",
  "pct_standard_exceeded",
  "pct_standard_met",
  "pct_standard_met_and_above",
  "pct_standard_nearly_met",
  "pct_standard_not_met",
  "students_with_scores",
  "area1_pct_above_standard",
  "area1_pct_near_standard",
  "area1_pct_below_standard",
  "area2_pct_above_standard",
  "area2_pct_near_standard",
  "area2_pct_below_standard",
  "area3_pct_above_standard",
  "area3_pct_near_standard",
  "area3_pct_below_standard",
  "area4_pct_above_standard",
  "area4_pct_near_standard",
  "area4_pct_below_standard",
  "type_id"
)

lausd_caaspp <- caaspp %>%
    filter(district_code=="64733") %>%
    inner_join(frpm_lausd, by="school_code") %>%
  filter(student_group_id == 1) %>%  # All Students only
  mutate(
    test_subject = case_when(
      test_id == 1 ~ "ELA",
      test_id == 2 ~ "Math",
      TRUE ~ NA_character_
    )
  ) %>%
  filter(!is.na(test_subject))  # Keep only ELA and Math

# 2. Convert test scores to numeric (some may be "*")
lausd_caaspp_numeric <- lausd_caaspp %>%
  mutate(across(
    c(mean_scale_score,
      pct_standard_met_and_above,
      pct_standard_not_met),
    ~ as.numeric(na_if(., "*"))
  ))

# 3. Aggregate to school-subject-year level
school_scores <- lausd_caaspp_numeric %>%
  group_by(school_code, test_year, test_subject) %>%
  summarise(
    avg_pct_met_above = mean(pct_standard_met_and_above, na.rm = TRUE),
    avg_pct_not_met   = mean(pct_standard_not_met, na.rm = TRUE),
    avg_scale_score   = mean(mean_scale_score, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  pivot_wider(
    names_from = test_subject,
    values_from = c(avg_pct_met_above, avg_pct_not_met, avg_scale_score),
    names_glue = "{.value}_{test_subject}"
  )


glimpse(school_scores)
## Rows: 969
## Columns: 8
## $ school_code            <chr> "0100289", "0100669", "0100677", "0100743", "01…
## $ test_year              <dbl> 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023,…
## $ avg_pct_met_above_ELA  <dbl> 28.42250, 34.91800, 64.20000, 43.83400, 45.7400…
## $ avg_pct_met_above_Math <dbl> 28.4725, 14.6680, 34.5700, 31.5940, 26.8800, 27…
## $ avg_pct_not_met_ELA    <dbl> 47.72000, 42.64000, 17.28000, 26.84200, 29.7900…
## $ avg_pct_not_met_Math   <dbl> 38.26500, 60.04200, 37.04000, 37.50200, 55.9100…
## $ avg_scale_score_ELA    <dbl> 2409.033, 2492.700, 2610.000, 2473.325, 2565.90…
## $ avg_scale_score_Math   <dbl> 2438.567, 2457.350, 2582.900, 2459.500, 2536.60…
  • Final Data Set is merged on school code

All RDD Plots

## chronic_absenteeism ~ frpm_percent @ 35

Estimate (tau): 6.478
SE: 4.216
95% CI: [-1.784, 14.741]
p-value: 0.124

Call: rdplot

Number of Obs. 1001 Kernel Uniform

Number of Obs. 68 933 Eff. Number of Obs. 68 933 Order poly. fit (p) 4 4 BW poly. fit (h) 35.000 65.000 Number of bins scale 1.000 1.000

## btb ~ frpm_percent @ 35

Estimate (tau): -0.127
SE: 0.162
95% CI: [-0.445, 0.190]
p-value: 0.432

Call: rdplot

Number of Obs. 1001 Kernel Uniform

Number of Obs. 68 933 Eff. Number of Obs. 68 933 Order poly. fit (p) 4 4 BW poly. fit (h) 35.000 65.000 Number of bins scale 1.000 1.000

## avg_pct_met_above_ELA ~ frpm_percent @ 35

Estimate (tau): -12.850
SE: 6.001
95% CI: [-24.612, -1.088]
p-value: 0.032

Call: rdplot

Number of Obs. 960 Kernel Uniform

Number of Obs. 66 894 Eff. Number of Obs. 66 894 Order poly. fit (p) 4 4 BW poly. fit (h) 35.000 65.000 Number of bins scale 1.000 1.000

## avg_pct_met_above_Math ~ frpm_percent @ 35

Estimate (tau): -15.069
SE: 8.699
95% CI: [-32.120, 1.981]
p-value: 0.083

Call: rdplot

Number of Obs. 960 Kernel Uniform

Number of Obs. 66 894 Eff. Number of Obs. 66 894 Order poly. fit (p) 4 4 BW poly. fit (h) 35.000 65.000 Number of bins scale 1.000 1.000

## avg_pct_not_met_ELA ~ frpm_percent @ 35

Estimate (tau): 9.376
SE: 4.539
95% CI: [0.480, 18.271]
p-value: 0.039

Call: rdplot

Number of Obs. 960 Kernel Uniform

Number of Obs. 66 894 Eff. Number of Obs. 66 894 Order poly. fit (p) 4 4 BW poly. fit (h) 35.000 65.000 Number of bins scale 1.000 1.000

## avg_pct_not_met_Math ~ frpm_percent @ 35

Estimate (tau): 11.277
SE: 5.869
95% CI: [-0.226, 22.780]
p-value: 0.055

Call: rdplot

Number of Obs. 960 Kernel Uniform

Number of Obs. 66 894 Eff. Number of Obs. 66 894 Order poly. fit (p) 4 4 BW poly. fit (h) 35.000 65.000 Number of bins scale 1.000 1.000

## avg_scale_score_ELA ~ frpm_percent @ 35

Estimate (tau): -24.406
SE: 25.897
95% CI: [-75.164, 26.351]
p-value: 0.346

Call: rdplot

Number of Obs. 958 Kernel Uniform

Number of Obs. 66 892 Eff. Number of Obs. 66 892 Order poly. fit (p) 4 4 BW poly. fit (h) 35.000 65.000 Number of bins scale 1.000 1.000

## avg_scale_score_Math ~ frpm_percent @ 35

Estimate (tau): -32.251
SE: 25.797
95% CI: [-82.812, 18.310]
p-value: 0.211

Call: rdplot

Number of Obs. 958 Kernel Uniform

Number of Obs. 66 892 Eff. Number of Obs. 66 892 Order poly. fit (p) 4 4 BW poly. fit (h) 35.000 65.000 Number of bins scale 1.000 1.000

## chronic_absenteeism ~ frpm_percent @ 75

Estimate (tau): 1.488
SE: 4.687
95% CI: [-7.698, 10.674]
p-value: 0.751

Call: rdplot

Number of Obs. 1001 Kernel Uniform

Number of Obs. 235 766 Eff. Number of Obs. 235 766 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000

## btb ~ frpm_percent @ 75

Estimate (tau): -0.319
SE: 0.173
95% CI: [-0.658, 0.020]
p-value: 0.065

Call: rdplot

Number of Obs. 1001 Kernel Uniform

Number of Obs. 235 766 Eff. Number of Obs. 235 766 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000

## avg_pct_met_above_ELA ~ frpm_percent @ 75

Estimate (tau): 8.095
SE: 4.953
95% CI: [-1.612, 17.802]
p-value: 0.102

Call: rdplot

Number of Obs. 960 Kernel Uniform

Number of Obs. 227 733 Eff. Number of Obs. 227 733 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000

## avg_pct_met_above_Math ~ frpm_percent @ 75

Estimate (tau): 3.355
SE: 6.315
95% CI: [-9.022, 15.731]
p-value: 0.595

Call: rdplot

Number of Obs. 960 Kernel Uniform

Number of Obs. 227 733 Eff. Number of Obs. 227 733 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000

## avg_pct_not_met_ELA ~ frpm_percent @ 75

Estimate (tau): -11.123
SE: 4.753
95% CI: [-20.438, -1.808]
p-value: 0.019

Call: rdplot

Number of Obs. 960 Kernel Uniform

Number of Obs. 227 733 Eff. Number of Obs. 227 733 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000

## avg_pct_not_met_Math ~ frpm_percent @ 75

Estimate (tau): -8.667
SE: 7.271
95% CI: [-22.918, 5.584]
p-value: 0.233

Call: rdplot

Number of Obs. 960 Kernel Uniform

Number of Obs. 227 733 Eff. Number of Obs. 227 733 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000

## avg_scale_score_ELA ~ frpm_percent @ 75

Estimate (tau): 32.740
SE: 17.269
95% CI: [-1.106, 66.586]
p-value: 0.058

Call: rdplot

Number of Obs. 958 Kernel Uniform

Number of Obs. 227 731 Eff. Number of Obs. 227 731 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000

## avg_scale_score_Math ~ frpm_percent @ 75

Estimate (tau): 22.756
SE: 12.822
95% CI: [-2.375, 47.888]
p-value: 0.076

Call: rdplot

Number of Obs. 958 Kernel Uniform

Number of Obs. 227 731 Eff. Number of Obs. 227 731 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000

## chronic_absenteeism ~ undup_pct @ 55

Estimate (tau): -0.188
SE: 3.485
95% CI: [-7.019, 6.643]
p-value: 0.957

Call: rdplot

Number of Obs. 1001 Kernel Uniform

Number of Obs. 114 887 Eff. Number of Obs. 114 887 Order poly. fit (p) 4 4 BW poly. fit (h) 55.000 45.000 Number of bins scale 1.000 1.000

## btb ~ undup_pct @ 55

Estimate (tau): 0.110
SE: 0.224
95% CI: [-0.330, 0.550]
p-value: 0.623

Call: rdplot

Number of Obs. 1001 Kernel Uniform

Number of Obs. 114 887 Eff. Number of Obs. 114 887 Order poly. fit (p) 4 4 BW poly. fit (h) 55.000 45.000 Number of bins scale 1.000 1.000

## avg_pct_met_above_ELA ~ undup_pct @ 55

Estimate (tau): -0.753
SE: 4.683
95% CI: [-9.932, 8.426]
p-value: 0.872

Call: rdplot

Number of Obs. 960 Kernel Uniform

Number of Obs. 112 848 Eff. Number of Obs. 112 848 Order poly. fit (p) 4 4 BW poly. fit (h) 55.000 45.000 Number of bins scale 1.000 1.000

## avg_pct_met_above_Math ~ undup_pct @ 55

Estimate (tau): 4.277
SE: 7.507
95% CI: [-10.437, 18.990]
p-value: 0.569

Call: rdplot

Number of Obs. 960 Kernel Uniform

Number of Obs. 112 848 Eff. Number of Obs. 112 848 Order poly. fit (p) 4 4 BW poly. fit (h) 55.000 45.000 Number of bins scale 1.000 1.000

## avg_pct_not_met_ELA ~ undup_pct @ 55

Estimate (tau): 2.401
SE: 3.469
95% CI: [-4.399, 9.201]
p-value: 0.489

Call: rdplot

Number of Obs. 960 Kernel Uniform

Number of Obs. 112 848 Eff. Number of Obs. 112 848 Order poly. fit (p) 4 4 BW poly. fit (h) 55.000 45.000 Number of bins scale 1.000 1.000

## avg_pct_not_met_Math ~ undup_pct @ 55

Estimate (tau): -2.151
SE: 6.753
95% CI: [-15.386, 11.084]
p-value: 0.750

Call: rdplot

Number of Obs. 960 Kernel Uniform

Number of Obs. 112 848 Eff. Number of Obs. 112 848 Order poly. fit (p) 4 4 BW poly. fit (h) 55.000 45.000 Number of bins scale 1.000 1.000

## avg_scale_score_ELA ~ undup_pct @ 55

Estimate (tau): 13.012
SE: 28.723
95% CI: [-43.284, 69.307]
p-value: 0.651

Call: rdplot

Number of Obs. 958 Kernel Uniform

Number of Obs. 112 846 Eff. Number of Obs. 112 846 Order poly. fit (p) 4 4 BW poly. fit (h) 55.000 45.000 Number of bins scale 1.000 1.000

## avg_scale_score_Math ~ undup_pct @ 55

Estimate (tau): 2.480
SE: 18.709
95% CI: [-34.189, 39.148]
p-value: 0.895

Call: rdplot

Number of Obs. 958 Kernel Uniform

Number of Obs. 112 846 Eff. Number of Obs. 112 846 Order poly. fit (p) 4 4 BW poly. fit (h) 55.000 45.000 Number of bins scale 1.000 1.000

## chronic_absenteeism ~ undup_pct @ 75

Estimate (tau): -1.684
SE: 4.928
95% CI: [-11.342, 7.974]
p-value: 0.733

Call: rdplot

Number of Obs. 1001 Kernel Uniform

Number of Obs. 214 787 Eff. Number of Obs. 214 787 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000

## btb ~ undup_pct @ 75

Estimate (tau): -0.007
SE: 0.174
95% CI: [-0.348, 0.334]
p-value: 0.970

Call: rdplot

Number of Obs. 1001 Kernel Uniform

Number of Obs. 214 787 Eff. Number of Obs. 214 787 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000

## avg_pct_met_above_ELA ~ undup_pct @ 75

Estimate (tau): 2.113
SE: 4.545
95% CI: [-6.795, 11.021]
p-value: 0.642

Call: rdplot

Number of Obs. 960 Kernel Uniform

Number of Obs. 207 753 Eff. Number of Obs. 207 753 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000

## avg_pct_met_above_Math ~ undup_pct @ 75

Estimate (tau): 4.140
SE: 5.419
95% CI: [-6.481, 14.760]
p-value: 0.445

Call: rdplot

Number of Obs. 960 Kernel Uniform

Number of Obs. 207 753 Eff. Number of Obs. 207 753 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000

## avg_pct_not_met_ELA ~ undup_pct @ 75

Estimate (tau): -2.009
SE: 4.585
95% CI: [-10.996, 6.978]
p-value: 0.661

Call: rdplot

Number of Obs. 960 Kernel Uniform

Number of Obs. 207 753 Eff. Number of Obs. 207 753 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000

## avg_pct_not_met_Math ~ undup_pct @ 75

Estimate (tau): -3.593
SE: 5.798
95% CI: [-14.957, 7.772]
p-value: 0.536

Call: rdplot

Number of Obs. 960 Kernel Uniform

Number of Obs. 207 753 Eff. Number of Obs. 207 753 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000

## avg_scale_score_ELA ~ undup_pct @ 75

Estimate (tau): 2.250
SE: 23.602
95% CI: [-44.009, 48.510]
p-value: 0.924

Call: rdplot

Number of Obs. 958 Kernel Uniform

Number of Obs. 207 751 Eff. Number of Obs. 207 751 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000

## avg_scale_score_Math ~ undup_pct @ 75

Estimate (tau): 3.898
SE: 17.315
95% CI: [-30.038, 37.834]
p-value: 0.822

Call: rdplot

Number of Obs. 958 Kernel Uniform

Number of Obs. 207 751 Eff. Number of Obs. 207 751 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000

A tibble: 32 × 8

outcome running_variable cutoff tau se ci_lower ci_upper p_value 1 chronic_ab… frpm_percent 35 6.48e+0 4.22 -1.78 14.7 0.124 2 btb frpm_percent 35 -1.27e-1 0.162 -0.445 0.190 0.432 3 avg_pct_me… frpm_percent 35 -1.29e+1 6.00 -24.6 -1.09 0.0323 4 avg_pct_me… frpm_percent 35 -1.51e+1 8.70 -32.1 1.98 0.0832 5 avg_pct_no… frpm_percent 35 9.38e+0 4.54 0.480 18.3 0.0389 6 avg_pct_no… frpm_percent 35 1.13e+1 5.87 -0.226 22.8 0.0547 7 avg_scale_… frpm_percent 35 -2.44e+1 25.9 -75.2 26.4 0.346 8 avg_scale_… frpm_percent 35 -3.23e+1 25.8 -82.8 18.3 0.211 9 chronic_ab… frpm_percent 75 1.49e+0 4.69 -7.70 10.7 0.751 10 btb frpm_percent 75 -3.19e-1 0.173 -0.658 0.0202 0.0653 11 avg_pct_me… frpm_percent 75 8.10e+0 4.95 -1.61 17.8 0.102 12 avg_pct_me… frpm_percent 75 3.35e+0 6.31 -9.02 15.7 0.595 13 avg_pct_no… frpm_percent 75 -1.11e+1 4.75 -20.4 -1.81 0.0193 14 avg_pct_no… frpm_percent 75 -8.67e+0 7.27 -22.9 5.58 0.233 15 avg_scale_… frpm_percent 75 3.27e+1 17.3 -1.11 66.6 0.0580 16 avg_scale_… frpm_percent 75 2.28e+1 12.8 -2.38 47.9 0.0759 17 chronic_ab… undup_pct 55 -1.88e-1 3.49 -7.02 6.64 0.957 18 btb undup_pct 55 1.10e-1 0.224 -0.330 0.550 0.623 19 avg_pct_me… undup_pct 55 -7.53e-1 4.68 -9.93 8.43 0.872 20 avg_pct_me… undup_pct 55 4.28e+0 7.51 -10.4 19.0 0.569 21 avg_pct_no… undup_pct 55 2.40e+0 3.47 -4.40 9.20 0.489 22 avg_pct_no… undup_pct 55 -2.15e+0 6.75 -15.4 11.1 0.750 23 avg_scale_… undup_pct 55 1.30e+1 28.7 -43.3 69.3 0.651 24 avg_scale_… undup_pct 55 2.48e+0 18.7 -34.2 39.1 0.895 25 chronic_ab… undup_pct 75 -1.68e+0 4.93 -11.3 7.97 0.733 26 btb undup_pct 75 -6.64e-3 0.174 -0.348 0.334 0.970 27 avg_pct_me… undup_pct 75 2.11e+0 4.54 -6.80 11.0 0.642 28 avg_pct_me… undup_pct 75 4.14e+0 5.42 -6.48 14.8 0.445 29 avg_pct_no… undup_pct 75 -2.01e+0 4.59 -11.0 6.98 0.661 30 avg_pct_no… undup_pct 75 -3.59e+0 5.80 -15.0 7.77 0.536 31 avg_scale_… undup_pct 75 2.25e+0 23.6 -44.0 48.5 0.924 32 avg_scale_… undup_pct 75 3.90e+0 17.3 -30.0 37.8 0.822

GP RDD - chronic_abseentism ~ FRPM % - 35 cut off

######################################################
# GP RDD
# chronic_abseentism ~ FRPM % - 35 cut off
######################################################
rdd_res_absenteeism_frpm_35_cutoff <- gp_rdd(
  df_clean$frpm_percent,
  df_clean$chronic_absenteeism,
  35
)
rdd_res_absenteeism_frpm_35_cutoff$tau     # estimated effect
## [1] 7.865098
rdd_res_absenteeism_frpm_35_cutoff$se      # standard error
## [1] 5.739285
rdd_res_absenteeism_frpm_35_cutoff$ci      # confidence interval
##     lower     upper 
## -3.383694 19.113891
rdd_result_plot_1 <- gp_rdd_plot(rdd_res_absenteeism_frpm_35_cutoff) +
  geom_vline(xintercept = 35, linetype = "dashed") +
  coord_cartesian(xlim = c(20, 60)) +
  labs(title = "Zoomed-In View Around the Cutoff")
print(rdd_result_plot_1)

GP RDD - chronic_abseentism ~ FRPM % - 75 cut off

######################################################
# GP RDD
# chronic_abseentism ~ FRPM % - 75 cut off
######################################################
# Example using formula interface:
rdd_res_absenteeism_frpm_75_cutoff <- gp_rdd(
  df_clean$frpm_percent,
  df_clean$chronic_absenteeism,
  75
)
rdd_res_absenteeism_frpm_75_cutoff$tau     # estimated effect
## [1] -0.07402481
rdd_res_absenteeism_frpm_75_cutoff$se      # standard error
## [1] 6.059793
rdd_res_absenteeism_frpm_75_cutoff$ci      # confidence interval
##     lower     upper 
## -11.95100  11.80295
rdd_result_plot_2 <- gp_rdd_plot(rdd_res_absenteeism_frpm_75_cutoff) +
  geom_vline(xintercept = 35, linetype = "dashed") +
  coord_cartesian(xlim = c(20, 60)) +
  labs(title = "Zoomed-In View Around the Cutoff")
print(rdd_result_plot_2)

GP RDD - BTB ~ FRPM % - 35 cut off

######################################################
# GP RDD
# BTB ~ FRPM % - 35 cut off
######################################################
rdd_res_absenteeism_BTB_35_cutoff <- gp_rdd(
  df_clean$frpm_percent,
  df_clean$btb,
  35
)
rdd_res_absenteeism_BTB_35_cutoff$tau     # estimated effect
## [1] -0.2305462
rdd_res_absenteeism_BTB_35_cutoff$se      # standard error
## [1] 0.2237574
rdd_res_absenteeism_BTB_35_cutoff$ci      # confidence interval
##      lower      upper 
## -0.6691026  0.2080103
rdd_result_plot_3 <- gp_rdd_plot(rdd_res_absenteeism_BTB_35_cutoff) +
  geom_vline(xintercept = 35, linetype = "dashed") +
  coord_cartesian(xlim = c(20, 60)) +
  labs(title = "Zoomed-In View Around the Cutoff")
print(rdd_result_plot_3)

GP RDD - BTB ~ FRPM % - 75 cut off

######################################################
# GP RDD
# BTB ~ FRPM % - 75 cut off
######################################################
rdd_res_absenteeism_BTB_75_cutoff <- gp_rdd(
  df_clean$frpm_percent,
  df_clean$btb,
  75
)
rdd_res_absenteeism_BTB_75_cutoff$tau     # estimated effect
## [1] -0.1589799
rdd_res_absenteeism_BTB_75_cutoff$se      # standard error
## [1] 0.2079251
rdd_res_absenteeism_BTB_75_cutoff$ci      # confidence interval
##      lower      upper 
## -0.5665055  0.2485458
rdd_result_plot_4 <- gp_rdd_plot(rdd_res_absenteeism_BTB_75_cutoff) +
                      geom_vline(xintercept = 75, linetype = "dashed") +
                      coord_cartesian(xlim = c(20, 60)) +
                      labs(title = "Zoomed-In View Around the Cutoff")
print(rdd_result_plot_4)

GP RDD - chronic_abseentism ~ Unduplicated Pupil % - 55% cut off

rdd_res_absenteeism_undup_55_cutoff <- gp_rdd(
  df_clean$undup_pct,
  df_clean$chronic_absenteeism,
  55
)

rdd_res_absenteeism_undup_55_cutoff$tau
## [1] 3.060495
rdd_res_absenteeism_undup_55_cutoff$se
## [1] 7.58411
rdd_res_absenteeism_undup_55_cutoff$ci
##     lower     upper 
## -11.80409  17.92508
rdd_plot_undup_55_absent <- gp_rdd_plot(rdd_res_absenteeism_undup_55_cutoff) +
  geom_vline(xintercept = 55, linetype = "dashed") +
  coord_cartesian(xlim = c(30, 80)) +
  labs(title = "Chronic Absenteeism ~ Unduplicated % (55% Cutoff)")
print(rdd_plot_undup_55_absent)

# GP RDD - chronic_abseentism ~ Unduplicated Pupil % - 75% cut off

rdd_res_absenteeism_undup_75_cutoff <- gp_rdd(
  df_clean$undup_pct,
  df_clean$chronic_absenteeism,
  75
)

rdd_res_absenteeism_undup_75_cutoff$tau
## [1] -1.667907
rdd_res_absenteeism_undup_75_cutoff$se
## [1] 6.675245
rdd_res_absenteeism_undup_75_cutoff$ci
##     lower     upper 
## -14.75115  11.41533
rdd_plot_undup_75_absent <- gp_rdd_plot(rdd_res_absenteeism_undup_75_cutoff) +
  geom_vline(xintercept = 75, linetype = "dashed") +
  coord_cartesian(xlim = c(50, 95)) +
  labs(title = "Chronic Absenteeism ~ Unduplicated % (75% Cutoff)")
print(rdd_plot_undup_75_absent)

GP RDD - BTB ~ Unduplicated Pupil % - 55% cut off

rdd_res_btb_undup_55_cutoff <- gp_rdd(
  df_clean$undup_pct,
  df_clean$btb,
  55
)

rdd_res_btb_undup_55_cutoff$tau
## [1] -0.02215456
rdd_res_btb_undup_55_cutoff$se
## [1] 0.2609895
rdd_res_btb_undup_55_cutoff$ci
##      lower      upper 
## -0.5336846  0.4893755
rdd_plot_undup_55_btb <- gp_rdd_plot(rdd_res_btb_undup_55_cutoff) +
  geom_vline(xintercept = 55, linetype = "dashed") +
  coord_cartesian(xlim = c(30, 80)) +
  labs(title = "BTB Participation ~ Unduplicated % (55% Cutoff)")
print(rdd_plot_undup_55_btb)

GP RDD - BTB ~ Unduplicated Pupil % - 75% cut off

rdd_res_btb_undup_75_cutoff <- gp_rdd(
  df_clean$undup_pct,
  df_clean$btb,
  75
)

rdd_res_btb_undup_75_cutoff$tau
## [1] 0.094369
rdd_res_btb_undup_75_cutoff$se
## [1] 0.2249487
rdd_res_btb_undup_75_cutoff$ci
##      lower      upper 
## -0.3465223  0.5352603
rdd_plot_undup_75_btb <- gp_rdd_plot(rdd_res_btb_undup_75_cutoff) +
  geom_vline(xintercept = 75, linetype = "dashed") +
  coord_cartesian(xlim = c(50, 95)) +
  labs(title = "BTB Participation ~ Unduplicated % (75% Cutoff)")
print(rdd_plot_undup_75_btb)

GP RDD - avg_pct_met_above_ELA ~ FRPM % - 35 cutoff

rdd_avg_pct_met_above_ela_frpm_35 <- gp_rdd(df_clean$frpm_percent, df_clean$avg_pct_met_above_ELA, 35)
rdd_avg_pct_met_above_ela_frpm_35$tau
## [1] -14.90419
rdd_avg_pct_met_above_ela_frpm_35$se
## [1] 6.742368
rdd_avg_pct_met_above_ela_frpm_35$ci
##      lower      upper 
## -28.118984  -1.689388
rdd_plot_avg_pct_met_above_ela_frpm_35 <- gp_rdd_plot(rdd_avg_pct_met_above_ela_frpm_35) +
  geom_vline(xintercept = 35, linetype = "dashed") +
  coord_cartesian(xlim = c(20, 60)) +
  labs(title = "% MetAbove ELA ~ FRPM (35% Cutoff)")
print(rdd_plot_avg_pct_met_above_ela_frpm_35)

GP RDD - avg_pct_met_above_ELA ~ FRPM % - 75 cutoff

rdd_avg_pct_met_above_ela_frpm_75 <- gp_rdd(df_clean$frpm_percent, df_clean$avg_pct_met_above_ELA, 75)
rdd_avg_pct_met_above_ela_frpm_75$tau
## [1] 1.103441
rdd_avg_pct_met_above_ela_frpm_75$se
## [1] 6.080521
rdd_avg_pct_met_above_ela_frpm_75$ci
##     lower     upper 
## -10.81416  13.02104
rdd_plot_avg_pct_met_above_ela_frpm_75 <- gp_rdd_plot(rdd_avg_pct_met_above_ela_frpm_75) +
  geom_vline(xintercept = 75, linetype = "dashed") +
  coord_cartesian(xlim = c(50, 95)) +
  labs(title = "% MetAbove ELA ~ FRPM (75% Cutoff)")
print(rdd_plot_avg_pct_met_above_ela_frpm_75)

GP RDD - avg_pct_met_above_ELA ~ Undup % - 55 cutoff

rdd_avg_pct_met_above_ela_undup_55 <- gp_rdd(df_clean$undup_pct, df_clean$avg_pct_met_above_ELA, 55)
rdd_avg_pct_met_above_ela_undup_55$tau
## [1] -6.078043
rdd_avg_pct_met_above_ela_undup_55$se
## [1] 7.770916
rdd_avg_pct_met_above_ela_undup_55$ci
##      lower      upper 
## -21.308759   9.152673
rdd_plot_avg_pct_met_above_ela_undup_55 <- gp_rdd_plot(rdd_avg_pct_met_above_ela_undup_55) +
  geom_vline(xintercept = 55, linetype = "dashed") +
  coord_cartesian(xlim = c(30, 80)) +
  labs(title = "% MetAbove ELA ~ Undup (55% Cutoff)")
print(rdd_plot_avg_pct_met_above_ela_undup_55)

GP RDD - avg_pct_met_above_ELA ~ Undup % - 75 cutoff

rdd_avg_pct_met_above_ela_undup_75 <- gp_rdd(df_clean$undup_pct, df_clean$avg_pct_met_above_ELA, 75)
rdd_avg_pct_met_above_ela_undup_75$tau
## [1] -5.278574
rdd_avg_pct_met_above_ela_undup_75$se
## [1] 6.61937
rdd_avg_pct_met_above_ela_undup_75$ci
##      lower      upper 
## -18.252301   7.695153
rdd_plot_avg_pct_met_above_ela_undup_75 <- gp_rdd_plot(rdd_avg_pct_met_above_ela_undup_75) +
  geom_vline(xintercept = 75, linetype = "dashed") +
  coord_cartesian(xlim = c(50, 95)) +
  labs(title = "% MetAbove ELA ~ Undup (75% Cutoff)")
print(rdd_plot_avg_pct_met_above_ela_undup_75)

GP RDD - avg_pct_met_above_Math ~ FRPM % - 35 cutoff

rdd_avg_pct_met_above_math_frpm_35 <- gp_rdd(df_clean$frpm_percent, df_clean$avg_pct_met_above_Math, 35)
rdd_avg_pct_met_above_math_frpm_35$tau
## [1] -18.51735
rdd_avg_pct_met_above_math_frpm_35$se
## [1] 7.425478
rdd_avg_pct_met_above_math_frpm_35$ci
##      lower      upper 
## -33.071017  -3.963677
rdd_plot_avg_pct_met_above_math_frpm_35 <- gp_rdd_plot(rdd_avg_pct_met_above_math_frpm_35) +
  geom_vline(xintercept = 35, linetype = "dashed") +
  coord_cartesian(xlim = c(20, 60)) +
  labs(title = "% MetAbove Math ~ FRPM (35% Cutoff)")
print(rdd_plot_avg_pct_met_above_math_frpm_35)

GP RDD - avg_pct_met_above_Math ~ FRPM % - 75 cutoff

rdd_avg_pct_met_above_math_frpm_75 <- gp_rdd(df_clean$frpm_percent, df_clean$avg_pct_met_above_Math, 75)
rdd_avg_pct_met_above_math_frpm_75$tau
## [1] -0.3246283
rdd_avg_pct_met_above_math_frpm_75$se
## [1] 5.969782
rdd_avg_pct_met_above_math_frpm_75$ci
##     lower     upper 
## -12.02519  11.37593
rdd_plot_avg_pct_met_above_math_frpm_75 <- gp_rdd_plot(rdd_avg_pct_met_above_math_frpm_75) +
  geom_vline(xintercept = 75, linetype = "dashed") +
  coord_cartesian(xlim = c(50, 95)) +
  labs(title = "% MetAbove Math ~ FRPM (75% Cutoff)")
print(rdd_plot_avg_pct_met_above_math_frpm_75)

GP RDD - avg_pct_met_above_Math ~ Undup % - 55 cutoff

rdd_avg_pct_met_above_math_undup_55 <- gp_rdd(df_clean$undup_pct, df_clean$avg_pct_met_above_Math, 55)
rdd_avg_pct_met_above_math_undup_55$tau
## [1] -4.556328
rdd_avg_pct_met_above_math_undup_55$se
## [1] 7.710299
rdd_avg_pct_met_above_math_undup_55$ci
##     lower     upper 
## -19.66824  10.55558
rdd_plot_avg_pct_met_above_math_undup_55 <- gp_rdd_plot(rdd_avg_pct_met_above_math_undup_55) +
  geom_vline(xintercept = 55, linetype = "dashed") +
  coord_cartesian(xlim = c(30, 80)) +
  labs(title = "% MetAbove Math ~ Undup (55% Cutoff)")
print(rdd_plot_avg_pct_met_above_math_undup_55)

GP RDD - avg_pct_met_above_Math ~ Undup % - 75 cutoff

rdd_avg_pct_met_above_math_undup_75 <- gp_rdd(df_clean$undup_pct, df_clean$avg_pct_met_above_Math, 75)
rdd_avg_pct_met_above_math_undup_75$tau
## [1] -1.108402
rdd_avg_pct_met_above_math_undup_75$se
## [1] 6.384361
rdd_avg_pct_met_above_math_undup_75$ci
##     lower     upper 
## -13.62152  11.40471
rdd_plot_avg_pct_met_above_math_undup_75 <- gp_rdd_plot(rdd_avg_pct_met_above_math_undup_75) +
  geom_vline(xintercept = 75, linetype = "dashed") +
  coord_cartesian(xlim = c(50, 95)) +
  labs(title = "% MetAbove Math ~ Undup (75% Cutoff)")
print(rdd_plot_avg_pct_met_above_math_undup_75)

GP RDD - avg_pct_not_met_ELA ~ FRPM % - 35 cutoff

rdd_avg_pct_not_met_ela_frpm_35 <- gp_rdd(df_clean$frpm_percent, df_clean$avg_pct_not_met_ELA, 35)
rdd_avg_pct_not_met_ela_frpm_35$tau
## [1] 11.0354
rdd_avg_pct_not_met_ela_frpm_35$se
## [1] 5.596307
rdd_avg_pct_not_met_ela_frpm_35$ci
##       lower       upper 
##  0.06683512 22.00395665
rdd_plot_avg_pct_not_met_ela_frpm_35 <- gp_rdd_plot(rdd_avg_pct_not_met_ela_frpm_35) +
  geom_vline(xintercept = 35, linetype = "dashed") +
  coord_cartesian(xlim = c(20, 60)) +
  labs(title = "% Not Met ELA ~ FRPM (35% Cutoff)")
print(rdd_plot_avg_pct_not_met_ela_frpm_35)

GP RDD - avg_pct_not_met_ELA ~ FRPM % - 75 cutoff

rdd_avg_pct_not_met_ela_frpm_75 <- gp_rdd(df_clean$frpm_percent, df_clean$avg_pct_not_met_ELA, 75)
rdd_avg_pct_not_met_ela_frpm_75$tau
## [1] -2.400395
rdd_avg_pct_not_met_ela_frpm_75$se
## [1] 5.939738
rdd_avg_pct_not_met_ela_frpm_75$ci
##      lower      upper 
## -14.042066   9.241277
rdd_plot_avg_pct_not_met_ela_frpm_75 <- gp_rdd_plot(rdd_avg_pct_not_met_ela_frpm_75) +
  geom_vline(xintercept = 75, linetype = "dashed") +
  coord_cartesian(xlim = c(50, 95)) +
  labs(title = "% Not Met ELA ~ FRPM (75% Cutoff)")
print(rdd_plot_avg_pct_not_met_ela_frpm_75)

GP RDD - avg_pct_not_met_ELA ~ Undup % - 55 cutoff

rdd_avg_pct_not_met_ela_undup_55 <- gp_rdd(df_clean$undup_pct, df_clean$avg_pct_not_met_ELA, 55)
rdd_avg_pct_not_met_ela_undup_55$tau
## [1] 5.978147
rdd_avg_pct_not_met_ela_undup_55$se
## [1] 7.40842
rdd_avg_pct_not_met_ela_undup_55$ci
##    lower    upper 
## -8.54209 20.49838
rdd_plot_avg_pct_not_met_ela_undup_55 <- gp_rdd_plot(rdd_avg_pct_not_met_ela_undup_55) +
  geom_vline(xintercept = 55, linetype = "dashed") +
  coord_cartesian(xlim = c(30, 80)) +
  labs(title = "% Not Met ELA ~ Undup (55% Cutoff)")
print(rdd_plot_avg_pct_not_met_ela_undup_55)

GP RDD - avg_pct_not_met_ELA ~ Undup % - 75 cutoff

rdd_avg_pct_not_met_ela_undup_75 <- gp_rdd(df_clean$undup_pct, df_clean$avg_pct_not_met_ELA, 75)
rdd_avg_pct_not_met_ela_undup_75$tau
## [1] 3.960924
rdd_avg_pct_not_met_ela_undup_75$se
## [1] 6.545555
rdd_avg_pct_not_met_ela_undup_75$ci
##     lower     upper 
## -8.868129 16.789977
rdd_plot_avg_pct_not_met_ela_undup_75 <- gp_rdd_plot(rdd_avg_pct_not_met_ela_undup_75) +
  geom_vline(xintercept = 75, linetype = "dashed") +
  coord_cartesian(xlim = c(50, 95)) +
  labs(title = "% Not Met ELA ~ Undup (75% Cutoff)")
print(rdd_plot_avg_pct_not_met_ela_undup_75)

GP RDD - avg_pct_not_met_Math ~ FRPM % - 35 cutoff

rdd_avg_pct_not_met_math_frpm_35 <- gp_rdd(df_clean$frpm_percent, df_clean$avg_pct_not_met_Math, 35)
rdd_avg_pct_not_met_math_frpm_35$tau
## [1] 14.2913
rdd_avg_pct_not_met_math_frpm_35$se
## [1] 6.88293
rdd_avg_pct_not_met_math_frpm_35$ci
##     lower     upper 
##  0.801007 27.781598
rdd_plot_avg_pct_not_met_math_frpm_35 <- gp_rdd_plot(rdd_avg_pct_not_met_math_frpm_35) +
  geom_vline(xintercept = 35, linetype = "dashed") +
  coord_cartesian(xlim = c(20, 60)) +
  labs(title = "% Not Met Math ~ FRPM (35% Cutoff)")
print(rdd_plot_avg_pct_not_met_math_frpm_35)

GP RDD - avg_pct_not_met_Math ~ FRPM % - 75 cutoff

rdd_avg_pct_not_met_math_frpm_75 <- gp_rdd(df_clean$frpm_percent, df_clean$avg_pct_not_met_Math, 75)
rdd_avg_pct_not_met_math_frpm_75$tau
## [1] -3.591685
rdd_avg_pct_not_met_math_frpm_75$se
## [1] 7.159961
rdd_avg_pct_not_met_math_frpm_75$ci
##     lower     upper 
## -17.62495  10.44158
rdd_plot_avg_pct_not_met_math_frpm_75 <- gp_rdd_plot(rdd_avg_pct_not_met_math_frpm_75) +
  geom_vline(xintercept = 75, linetype = "dashed") +
  coord_cartesian(xlim = c(50, 95)) +
  labs(title = "% Not Met Math ~ FRPM (75% Cutoff)")
print(rdd_plot_avg_pct_not_met_math_frpm_75)

GP RDD - avg_pct_not_met_Math ~ Undup % - 55 cutoff

rdd_avg_pct_not_met_math_undup_55 <- gp_rdd(df_clean$undup_pct, df_clean$avg_pct_not_met_Math, 55)
rdd_avg_pct_not_met_math_undup_55$tau
## [1] 4.95941
rdd_avg_pct_not_met_math_undup_55$se
## [1] 8.965036
rdd_avg_pct_not_met_math_undup_55$ci
##     lower     upper 
## -12.61174  22.53056
rdd_plot_avg_pct_not_met_math_undup_55 <- gp_rdd_plot(rdd_avg_pct_not_met_math_undup_55) +
  geom_vline(xintercept = 55, linetype = "dashed") +
  coord_cartesian(xlim = c(30, 80)) +
  labs(title = "% Not Met Math ~ Undup (55% Cutoff)")
print(rdd_plot_avg_pct_not_met_math_undup_55)

GP RDD - avg_pct_not_met_Math ~ Undup % - 75 cutoff

rdd_avg_pct_not_met_math_undup_75 <- gp_rdd(df_clean$undup_pct, df_clean$avg_pct_not_met_Math, 75)
rdd_avg_pct_not_met_math_undup_75$tau
## [1] 1.601258
rdd_avg_pct_not_met_math_undup_75$se
## [1] 7.802175
rdd_avg_pct_not_met_math_undup_75$ci
##     lower     upper 
## -13.69072  16.89324
rdd_plot_avg_pct_not_met_math_undup_75 <- gp_rdd_plot(rdd_avg_pct_not_met_math_undup_75) +
  geom_vline(xintercept = 75, linetype = "dashed") +
  coord_cartesian(xlim = c(50, 95)) +
  labs(title = "% Not Met Math ~ Undup (75% Cutoff)")
print(rdd_plot_avg_pct_not_met_math_undup_75)

GP RDD - avg_pct_not_met_Math ~ FRPM % - 35 cutoff

rdd_avg_pct_not_met_math_frpm_35 <- gp_rdd(df_clean$frpm_percent, df_clean$avg_pct_not_met_Math, 35)
rdd_avg_pct_not_met_math_frpm_35$tau
## [1] 14.2913
rdd_avg_pct_not_met_math_frpm_35$se
## [1] 6.88293
rdd_avg_pct_not_met_math_frpm_35$ci
##     lower     upper 
##  0.801007 27.781598
rdd_plot_avg_pct_not_met_math_frpm_35 <- gp_rdd_plot(rdd_avg_pct_not_met_math_frpm_35) +
  geom_vline(xintercept = 35, linetype = "dashed") +
  coord_cartesian(xlim = c(20, 60)) +
  labs(title = "% Not Met Math ~ FRPM (35% Cutoff)")
print(rdd_plot_avg_pct_not_met_math_frpm_35)

GP RDD - avg_pct_not_met_Math ~ FRPM % - 75 cutoff

rdd_avg_pct_not_met_math_frpm_75 <- gp_rdd(df_clean$frpm_percent, df_clean$avg_pct_not_met_Math, 75)
rdd_avg_pct_not_met_math_frpm_75$tau
## [1] -3.591685
rdd_avg_pct_not_met_math_frpm_75$se
## [1] 7.159961
rdd_avg_pct_not_met_math_frpm_75$ci
##     lower     upper 
## -17.62495  10.44158
rdd_plot_avg_pct_not_met_math_frpm_75 <- gp_rdd_plot(rdd_avg_pct_not_met_math_frpm_75) +
  geom_vline(xintercept = 75, linetype = "dashed") +
  coord_cartesian(xlim = c(50, 95)) +
  labs(title = "% Not Met Math ~ FRPM (75% Cutoff)")
print(rdd_plot_avg_pct_not_met_math_frpm_75)

GP RDD - avg_pct_not_met_Math ~ Undup % - 55 cutoff

rdd_avg_pct_not_met_math_undup_55 <- gp_rdd(df_clean$undup_pct, df_clean$avg_pct_not_met_Math, 55)
rdd_avg_pct_not_met_math_undup_55$tau
## [1] 4.95941
rdd_avg_pct_not_met_math_undup_55$se
## [1] 8.965036
rdd_avg_pct_not_met_math_undup_55$ci
##     lower     upper 
## -12.61174  22.53056
rdd_plot_avg_pct_not_met_math_undup_55 <- gp_rdd_plot(rdd_avg_pct_not_met_math_undup_55) +
  geom_vline(xintercept = 55, linetype = "dashed") +
  coord_cartesian(xlim = c(30, 80)) +
  labs(title = "% Not Met Math ~ Undup (55% Cutoff)")
print(rdd_plot_avg_pct_not_met_math_undup_55)

GP RDD - avg_pct_not_met_Math ~ Undup % - 75 cutoff

rdd_avg_pct_not_met_math_undup_75 <- gp_rdd(df_clean$undup_pct, df_clean$avg_pct_not_met_Math, 75)
rdd_avg_pct_not_met_math_undup_75$tau
## [1] 1.601258
rdd_avg_pct_not_met_math_undup_75$se
## [1] 7.802175
rdd_avg_pct_not_met_math_undup_75$ci
##     lower     upper 
## -13.69072  16.89324
rdd_plot_avg_pct_not_met_math_undup_75 <- gp_rdd_plot(rdd_avg_pct_not_met_math_undup_75) +
  geom_vline(xintercept = 75, linetype = "dashed") +
  coord_cartesian(xlim = c(50, 95)) +
  labs(title = "% Not Met Math ~ Undup (75% Cutoff)")
print(rdd_plot_avg_pct_not_met_math_undup_75)

GP RDD - avg_scale_score_ELA ~ FRPM % - 35 cutoff

rdd_avg_scale_score_ela_frpm_35 <- gp_rdd(df_clean$frpm_percent, df_clean$avg_scale_score_ELA, 35)
rdd_avg_scale_score_ela_frpm_35$tau
## [1] -27.88893
rdd_avg_scale_score_ela_frpm_35$se
## [1] 26.80531
rdd_avg_scale_score_ela_frpm_35$ci
##     lower     upper 
## -80.42637  24.64851
rdd_plot_avg_scale_score_ela_frpm_35 <- gp_rdd_plot(rdd_avg_scale_score_ela_frpm_35) +
  geom_vline(xintercept = 35, linetype = "dashed") +
  coord_cartesian(xlim = c(20, 60)) +
  labs(title = "Scale Score ELA ~ FRPM (35% Cutoff)")
print(rdd_plot_avg_scale_score_ela_frpm_35)

GP RDD - avg_scale_score_ELA ~ FRPM % - 75 cutoff

rdd_avg_scale_score_ela_frpm_75 <- gp_rdd(df_clean$frpm_percent, df_clean$avg_scale_score_ELA, 75)
rdd_avg_scale_score_ela_frpm_75$tau
## [1] -0.052738
rdd_avg_scale_score_ela_frpm_75$se
## [1] 26.98478
rdd_avg_scale_score_ela_frpm_75$ci
##     lower     upper 
## -52.94193  52.83646
rdd_plot_avg_scale_score_ela_frpm_75 <- gp_rdd_plot(rdd_avg_scale_score_ela_frpm_75) +
  geom_vline(xintercept = 75, linetype = "dashed") +
  coord_cartesian(xlim = c(50, 95)) +
  labs(title = "Scale Score ELA ~ FRPM (75% Cutoff)")
print(rdd_plot_avg_scale_score_ela_frpm_75)

GP RDD - avg_scale_score_ELA ~ Undup % - 55 cutoff

rdd_avg_scale_score_ela_undup_55 <- gp_rdd(df_clean$undup_pct, df_clean$avg_scale_score_ELA, 55)
rdd_avg_scale_score_ela_undup_55$tau
## [1] -15.09553
rdd_avg_scale_score_ela_undup_55$se
## [1] 33.50612
rdd_avg_scale_score_ela_undup_55$ci
##     lower     upper 
## -80.76632  50.57525
rdd_plot_avg_scale_score_ela_undup_55 <- gp_rdd_plot(rdd_avg_scale_score_ela_undup_55) +
  geom_vline(xintercept = 55, linetype = "dashed") +
  coord_cartesian(xlim = c(30, 80)) +
  labs(title = "Scale Score ELA ~ Undup (55% Cutoff)")
print(rdd_plot_avg_scale_score_ela_undup_55)

GP RDD - avg_scale_score_ELA ~ Undup % - 75 cutoff

rdd_avg_scale_score_ela_undup_75 <- gp_rdd(df_clean$undup_pct, df_clean$avg_scale_score_ELA, 75)
rdd_avg_scale_score_ela_undup_75$tau
## [1] -30.14377
rdd_avg_scale_score_ela_undup_75$se
## [1] 29.37465
rdd_avg_scale_score_ela_undup_75$ci
##     lower     upper 
## -87.71703  27.42948
rdd_plot_avg_scale_score_ela_undup_75 <- gp_rdd_plot(rdd_avg_scale_score_ela_undup_75) +
  geom_vline(xintercept = 75, linetype = "dashed") +
  coord_cartesian(xlim = c(50, 95)) +
  labs(title = "Scale Score ELA ~ Undup (75% Cutoff)")
print(rdd_plot_avg_scale_score_ela_undup_75)

GP RDD - avg_scale_score_Math ~ FRPM % - 35 cutoff

rdd_avg_scale_score_math_frpm_35 <- gp_rdd(df_clean$frpm_percent, df_clean$avg_scale_score_Math, 35)
rdd_avg_scale_score_math_frpm_35$tau
## [1] -35.14925
rdd_avg_scale_score_math_frpm_35$se
## [1] 22.46183
rdd_avg_scale_score_math_frpm_35$ci
##      lower      upper 
## -79.173619   8.875122
rdd_plot_avg_scale_score_math_frpm_35 <- gp_rdd_plot(rdd_avg_scale_score_math_frpm_35) +
  geom_vline(xintercept = 35, linetype = "dashed") +
  coord_cartesian(xlim = c(20, 60)) +
  labs(title = "Scale Score Math ~ FRPM (35% Cutoff)")
print(rdd_plot_avg_scale_score_math_frpm_35)

GP RDD - avg_scale_score_Math ~ FRPM % - 75 cutoff

rdd_avg_scale_score_math_frpm_75 <- gp_rdd(df_clean$frpm_percent, df_clean$avg_scale_score_Math, 75)
rdd_avg_scale_score_math_frpm_75$tau
## [1] -3.229631
rdd_avg_scale_score_math_frpm_75$se
## [1] 19.66241
rdd_avg_scale_score_math_frpm_75$ci
##     lower     upper 
## -41.76724  35.30798
rdd_plot_avg_scale_score_math_frpm_75 <- gp_rdd_plot(rdd_avg_scale_score_math_frpm_75) +
  geom_vline(xintercept = 75, linetype = "dashed") +
  coord_cartesian(xlim = c(50, 95)) +
  labs(title = "Scale Score Math ~ FRPM (75% Cutoff)")
print(rdd_plot_avg_scale_score_math_frpm_75)

GP RDD - avg_scale_score_Math ~ Undup % - 55 cutoff

rdd_avg_scale_score_math_undup_55 <- gp_rdd(df_clean$undup_pct, df_clean$avg_scale_score_Math, 55)
rdd_avg_scale_score_math_undup_55$tau
## [1] -9.742052
rdd_avg_scale_score_math_undup_55$se
## [1] 25.18616
rdd_avg_scale_score_math_undup_55$ci
##     lower     upper 
## -59.10602  39.62191
rdd_plot_avg_scale_score_math_undup_55 <- gp_rdd_plot(rdd_avg_scale_score_math_undup_55) +
  geom_vline(xintercept = 55, linetype = "dashed") +
  coord_cartesian(xlim = c(30, 80)) +
  labs(title = "Scale Score Math ~ Undup (55% Cutoff)")
print(rdd_plot_avg_scale_score_math_undup_55)

GP RDD - avg_scale_score_Math ~ Undup % - 75 cutoff

rdd_avg_scale_score_math_undup_75 <- gp_rdd(df_clean$undup_pct, df_clean$avg_scale_score_Math, 75)
rdd_avg_scale_score_math_undup_75$tau
## [1] -26.1681
rdd_avg_scale_score_math_undup_75$se
## [1] 21.30703
rdd_avg_scale_score_math_undup_75$ci
##    lower    upper 
## -67.9291  15.5929
rdd_plot_avg_scale_score_math_undup_75 <- gp_rdd_plot(rdd_avg_scale_score_math_undup_75) +
  geom_vline(xintercept = 75, linetype = "dashed") +
  coord_cartesian(xlim = c(50, 95)) +
  labs(title = "Scale Score Math ~ Undup (75% Cutoff)")
print(rdd_plot_avg_scale_score_math_undup_75)

# Outcome Summary Table

# Utility function to extract results from a gp_rdd object
extract_rdd_result <- function(obj, label) {
  tibble(
    model = label,
    tau = obj$tau,
    se = obj$se,
    ci_lower = obj$ci[1],
    ci_upper = obj$ci[2],
    p_value = 2 * pnorm(-abs(obj$tau / obj$se)),
    significant = ifelse(p_value < 0.05, TRUE, FALSE)
  )
}

# Combine all results into one table
gp_rdd_summary_table <- bind_rows(
  extract_rdd_result(rdd_avg_pct_met_above_ela_frpm_35, "% MetAbove ELA ~ FRPM (35%)"),
  extract_rdd_result(rdd_avg_pct_met_above_ela_frpm_75, "% MetAbove ELA ~ FRPM (75%)"),
  extract_rdd_result(rdd_avg_pct_met_above_ela_undup_55, "% MetAbove ELA ~ Undup (55%)"),
  extract_rdd_result(rdd_avg_pct_met_above_ela_undup_75, "% MetAbove ELA ~ Undup (75%)"),
  extract_rdd_result(rdd_avg_pct_met_above_math_frpm_35, "% MetAbove Math ~ FRPM (35%)"),
  extract_rdd_result(rdd_avg_pct_met_above_math_frpm_75, "% MetAbove Math ~ FRPM (75%)"),
  extract_rdd_result(rdd_avg_pct_met_above_math_undup_55, "% MetAbove Math ~ Undup (55%)"),
  extract_rdd_result(rdd_avg_pct_met_above_math_undup_75, "% MetAbove Math ~ Undup (75%)"),
  extract_rdd_result(rdd_res_absenteeism_frpm_35_cutoff, "Chronic Absenteeism ~ FRPM (35%)"),
  extract_rdd_result(rdd_res_absenteeism_frpm_75_cutoff, "Chronic Absenteeism ~ FRPM (75%)"),
  extract_rdd_result(rdd_res_absenteeism_BTB_35_cutoff, "BTB ~ FRPM (35%)"),
  extract_rdd_result(rdd_res_absenteeism_BTB_75_cutoff, "BTB ~ FRPM (75%)"),
  extract_rdd_result(rdd_res_absenteeism_undup_55_cutoff, "Chronic Absenteeism ~ Undup (55%)"),
  extract_rdd_result(rdd_res_absenteeism_undup_75_cutoff, "Chronic Absenteeism ~ Undup (75%)"),
  extract_rdd_result(rdd_avg_pct_not_met_math_frpm_35, "% Not Met Math ~ FRPM (35%)"),
  extract_rdd_result(rdd_avg_pct_not_met_math_frpm_75, "% Not Met Math ~ FRPM (75%)"),
  extract_rdd_result(rdd_avg_pct_not_met_math_undup_55, "% Not Met Math ~ Undup (55%)"),
  extract_rdd_result(rdd_avg_pct_not_met_math_undup_75, "% Not Met Math ~ Undup (75%)"),
  extract_rdd_result(rdd_avg_scale_score_ela_frpm_35, "Scale ELA ~ FRPM (35%)"),
  extract_rdd_result(rdd_avg_scale_score_ela_frpm_75, "Scale ELA ~ FRPM (75%)"),
  extract_rdd_result(rdd_avg_scale_score_ela_undup_55, "Scale ELA ~ Undup (55%)"),
  extract_rdd_result(rdd_avg_scale_score_ela_undup_75, "Scale ELA ~ Undup (75%)"),
  extract_rdd_result(rdd_avg_scale_score_math_frpm_35, "Scale Math ~ FRPM (35%)"),
  extract_rdd_result(rdd_avg_scale_score_math_frpm_75, "Scale Math ~ FRPM (75%)"),
  extract_rdd_result(rdd_avg_scale_score_math_undup_55, "Scale Math ~ Undup (55%)"),
  extract_rdd_result(rdd_avg_scale_score_math_undup_75, "Scale Math ~ Undup (75%)")
)

print(gp_rdd_summary_table, n = Inf)
## # A tibble: 26 × 7
##    model                        tau     se ci_lower ci_upper p_value significant
##    <chr>                      <dbl>  <dbl>    <dbl>    <dbl>   <dbl> <lgl>      
##  1 % MetAbove ELA ~ FRPM … -14.9     6.74   -28.1     -1.69   0.0271 TRUE       
##  2 % MetAbove ELA ~ FRPM …   1.10    6.08   -10.8     13.0    0.856  FALSE      
##  3 % MetAbove ELA ~ Undup…  -6.08    7.77   -21.3      9.15   0.434  FALSE      
##  4 % MetAbove ELA ~ Undup…  -5.28    6.62   -18.3      7.70   0.425  FALSE      
##  5 % MetAbove Math ~ FRPM… -18.5     7.43   -33.1     -3.96   0.0126 TRUE       
##  6 % MetAbove Math ~ FRPM…  -0.325   5.97   -12.0     11.4    0.957  FALSE      
##  7 % MetAbove Math ~ Undu…  -4.56    7.71   -19.7     10.6    0.555  FALSE      
##  8 % MetAbove Math ~ Undu…  -1.11    6.38   -13.6     11.4    0.862  FALSE      
##  9 Chronic Absenteeism ~ …   7.87    5.74    -3.38    19.1    0.171  FALSE      
## 10 Chronic Absenteeism ~ …  -0.0740  6.06   -12.0     11.8    0.990  FALSE      
## 11 BTB ~ FRPM (35%)         -0.231   0.224   -0.669    0.208  0.303  FALSE      
## 12 BTB ~ FRPM (75%)         -0.159   0.208   -0.567    0.249  0.445  FALSE      
## 13 Chronic Absenteeism ~ …   3.06    7.58   -11.8     17.9    0.687  FALSE      
## 14 Chronic Absenteeism ~ …  -1.67    6.68   -14.8     11.4    0.803  FALSE      
## 15 % Not Met Math ~ FRPM …  14.3     6.88     0.801   27.8    0.0379 TRUE       
## 16 % Not Met Math ~ FRPM …  -3.59    7.16   -17.6     10.4    0.616  FALSE      
## 17 % Not Met Math ~ Undup…   4.96    8.97   -12.6     22.5    0.580  FALSE      
## 18 % Not Met Math ~ Undup…   1.60    7.80   -13.7     16.9    0.837  FALSE      
## 19 Scale ELA ~ FRPM (35%)  -27.9    26.8    -80.4     24.6    0.298  FALSE      
## 20 Scale ELA ~ FRPM (75%)   -0.0527 27.0    -52.9     52.8    0.998  FALSE      
## 21 Scale ELA ~ Undup (55%) -15.1    33.5    -80.8     50.6    0.652  FALSE      
## 22 Scale ELA ~ Undup (75%) -30.1    29.4    -87.7     27.4    0.305  FALSE      
## 23 Scale Math ~ FRPM (35%) -35.1    22.5    -79.2      8.88   0.118  FALSE      
## 24 Scale Math ~ FRPM (75%)  -3.23   19.7    -41.8     35.3    0.870  FALSE      
## 25 Scale Math ~ Undup (55…  -9.74   25.2    -59.1     39.6    0.699  FALSE      
## 26 Scale Math ~ Undup (75… -26.2    21.3    -67.9     15.6    0.219  FALSE

Balance Tests with Xs

# ──────────────────────────────────────────────────────────────
# Balance Tests with GP-RDD for Two Running Variables (FRPM and UPP)
# ──────────────────────────────────────────────────────────────
library(dplyr)
library(tidyr)
library(gpss)

# -----------------------------
# Continuous Covariates Balance via GP-RDD
# -----------------------------
gp_rdd_balance_test_cont <- function(df, var, running_var, cutoff) {
  df_sub <- df %>% filter(!is.na(.data[[var]]), !is.na(.data[[running_var]]))

  rdd_result <- gp_rdd(df_sub[[running_var]], df_sub[[var]], cutoff)

  tibble(
    variable = var,
    running_var = running_var,
    cutoff = cutoff,
    tau = rdd_result$tau,
    se = rdd_result$se,
    p_value = 2 * pnorm(-abs(rdd_result$tau / rdd_result$se)),
    ci_lower = rdd_result$ci[1],
    ci_upper = rdd_result$ci[2]
  )
}

# -----------------------------
# Binary Categorical Covariates Balance via GP-RDD
# -----------------------------
gp_rdd_balance_test_binary <- function(df, var, running_var, cutoff) {
  df_sub <- df %>% filter(!is.na(.data[[var]]), !is.na(.data[[running_var]]))
  df_sub <- df_sub %>% mutate(dummy = as.numeric(trimws(.data[[var]]) == "Yes" | .data[[var]] == 1))

  rdd_result <- gp_rdd(df_sub[[running_var]], df_sub$dummy, cutoff)

  tibble(
    variable = var,
    running_var = running_var,
    cutoff = cutoff,
    tau = rdd_result$tau,
    se = rdd_result$se,
    p_value = 2 * pnorm(-abs(rdd_result$tau / rdd_result$se)),
    ci_lower = rdd_result$ci[1],
    ci_upper = rdd_result$ci[2]
  )
}

# -----------------------------
# Run All Balance Tests
# -----------------------------
continuous_covariates <- c("pct_hispanic", "pct_black", "pct_white", "pct_asian", 
                           "pct_two_or_more", "pct_other", "total_enroll")
categorical_covariates <- c("Charter.School", "DASS")

cutoffs <- list(
  list(running = "frpm_rate", value = 0.35),
  list(running = "frpm_rate", value = 0.75),
  list(running = "undup_pct", value = 55),
  list(running = "undup_pct", value = 75)
)

balance_all <- purrr::map_dfr(cutoffs, function(cut) {
  cont_results <- purrr::map_dfr(continuous_covariates, ~gp_rdd_balance_test_cont(df_clean, .x, cut$running, cut$value))
  cat_results  <- purrr::map_dfr(categorical_covariates, ~gp_rdd_balance_test_binary(df_clean, .x, cut$running, cut$value))
  bind_rows(cont_results, cat_results)
})

# -----------------------------
# Print results
# -----------------------------
print(balance_all)
## # A tibble: 36 × 8
##    variable        running_var cutoff      tau      se p_value ci_lower ci_upper
##    <chr>           <chr>        <dbl>    <dbl>   <dbl>   <dbl>    <dbl>    <dbl>
##  1 pct_hispanic    frpm_rate     0.35  1.64e+1 6.82e+0  0.0162    3.04    29.8  
##  2 pct_black       frpm_rate     0.35  3.49e+0 5.10e+0  0.493    -6.50    13.5  
##  3 pct_white       frpm_rate     0.35 -1.17e+1 6.05e+0  0.0534  -23.5      0.171
##  4 pct_asian       frpm_rate     0.35 -7.62e+0 4.42e+0  0.0848  -16.3      1.04 
##  5 pct_two_or_more frpm_rate     0.35  6.36e-1 1.77e+0  0.719    -2.83     4.10 
##  6 pct_other       frpm_rate     0.35 -5.49e-1 4.83e+0  0.910   -10.0      8.92 
##  7 total_enroll    frpm_rate     0.35 -2.10e+1 3.05e+2  0.945  -618.     576.   
##  8 Charter.School  frpm_rate     0.35  1.53e-1 2.43e-1  0.531    -0.324    0.629
##  9 DASS            frpm_rate     0.35  7.38e-3 8.48e-2  0.931    -0.159    0.174
## 10 pct_hispanic    frpm_rate     0.75  6.27e+0 7.28e+0  0.389    -8.00    20.5  
## # ℹ 26 more rows

Covariate-Adjusted RD \(𝑌∼𝐷∣𝑋\)

library(dplyr)
library(gpss)

# Step 1: Prepare data with treatment variable and convert categorical vars to factor
df_frpm_35 <- df_clean %>%
  mutate(
    treatment = as.integer(frpm_percent >= 35),
    Charter.School = factor(Charter.School),
    DASS = factor(DASS)
  ) %>%
  filter(complete.cases(across(all_of(c(
    "chronic_absenteeism", "treatment", "frpm_percent",
    "Charter.School", "DASS", "pct_hispanic", "pct_white", "pct_two_or_more"
  )))))

# Step 2: Fit covariate-adjusted GP-RDD model
model_frpm_35 <- gpss(
  chronic_absenteeism ~ treatment + frpm_percent + Charter.School + DASS +
    pct_hispanic + pct_white + pct_two_or_more,
  data = df_frpm_35
)

# Step 3: Create two counterfactual observations at the cutoff (treatment = 0 and 1)
new_data <- df_frpm_35 %>%
  slice(1) %>%
  slice(rep(1, 2)) %>%
  mutate(
    treatment = c(0, 1),
    frpm_percent = 35,
    Charter.School = factor("Yes", levels = levels(df_frpm_35$Charter.School)),
    DASS = factor("Yes", levels = levels(df_frpm_35$DASS)),
    pct_hispanic = mean(df_frpm_35$pct_hispanic, na.rm = TRUE),
    pct_white = mean(df_frpm_35$pct_white, na.rm = TRUE),
    pct_two_or_more = mean(df_frpm_35$pct_two_or_more, na.rm = TRUE)
  )

# Step 4: Predict outcomes for both counterfactuals
preds <- predict(model_frpm_35, new_data)
## Warning in formula.character(object, env = baseenv()): Using formula(x) is deprecated when x is a character vector of length > 1.
##   Consider formula(paste(x, collapse = " ")) instead.
# Step 5: Estimate treatment effect and uncertainty
mu1 <- preds[2]
mu0 <- preds[1]
tau_hat <- mu1 - mu0

# Extract posterior covariance matrix for the predicted values
post_cov <- model_frpm_35$post_cov_orig
se_tau <- sqrt(post_cov[2,2] + post_cov[1,1] - 2 * post_cov[1,2])

# 95% CI
ci_lower <- tau_hat - 1.96 * se_tau
ci_upper <- tau_hat + 1.96 * se_tau

# p-value (two-sided test)
p_value <- 2 * pnorm(-abs(tau_hat / se_tau))

# Step 6: Output all results
print(tibble(
  model = "chronic_absenteeism ~ frpm_percent adj @ 35%",
  tau = tau_hat,
  se = se_tau,
  ci_lower = ci_lower,
  ci_upper = ci_upper,
  p_value = p_value,
  significant = p_value < 0.05
))
## # A tibble: 1 × 7
##   model                          tau    se ci_lower ci_upper p_value significant
##   <chr>                        <dbl> <dbl>    <dbl>    <dbl>   <dbl> <lgl>      
## 1 chronic_absenteeism ~ frpm_…  4.89  1.83     1.31     8.47 0.00745 TRUE
library(dplyr)
library(gpss)   # >=0.2.0
library(tibble)

# ---- settings ------------------------------------------------------------
outcomes <- c(
  "chronic_absenteeism",
  "btb",
  "avg_pct_met_above_ELA",
  "avg_pct_met_above_Math",
  "avg_pct_not_met_ELA",
  "avg_pct_not_met_Math",
  "avg_scale_score_ELA",
  "avg_scale_score_Math"
)
runvar   <- "frpm_percent"
cutoff   <- 35
local_bw <- 5
cont_covs <- c("pct_hispanic", "pct_white", "pct_two_or_more")

all_results <- list()

for (outcome in outcomes) {
  # prepare data for this outcome
  df <- df_clean %>%
    mutate(
      Charter = as.numeric(trimws(Charter.School) == "Yes"),
      DASS    = as.numeric(trimws(DASS) == "Yes")
    ) %>%
    filter(!is.na(.data[[outcome]]), !is.na(.data[[runvar]]))

  # split
  left  <- df %>% filter(.data[[runvar]] < cutoff)
  right <- df %>% filter(.data[[runvar]] >= cutoff)
  if (nrow(left) < 1 || nrow(right) < 1) {
    warning("Skipping ", outcome, ": insufficient data on one side of cutoff (left=", nrow(left), ", right=", nrow(right), ")")
    next
  }

  # --- unadjusted ----------------------------------------------------------
  fml_u <- reformulate(runvar, response = outcome)
  mod_L_u <- tryCatch(gpss(fml_u, data = left, optimize = TRUE), error = function(e) { warning("gpss error (unadj) for ", outcome, ": ", e$message); return(NULL) })
  mod_R_u <- tryCatch(gpss(fml_u, data = right, optimize = TRUE), error = function(e) { warning("gpss error (unadj) for ", outcome, ": ", e$message); return(NULL) })
  if (is.null(mod_L_u) || is.null(mod_R_u)) next

  profile_u <- tibble(!!runvar := cutoff)
  pred_L_u <- gp_predict(mod_L_u, profile_u)
  pred_R_u <- gp_predict(mod_R_u, profile_u)

  if (is.null(pred_L_u$Ys_mean_orig) || is.null(pred_R_u$Ys_mean_orig) ||
      is.null(pred_L_u$Ys_cov_orig)  || is.null(pred_R_u$Ys_cov_orig)) {
    warning("Skipping unadjusted for ", outcome, ": missing prediction components")
  } else {
    mu_L_u  <- drop(pred_L_u$Ys_mean_orig)
    mu_R_u  <- drop(pred_R_u$Ys_mean_orig)
    var_L_u <- drop(pred_L_u$Ys_cov_orig)
    var_R_u <- drop(pred_R_u$Ys_cov_orig)

    tau_u  <- as.numeric(mu_R_u - mu_L_u)
    se_u   <- sqrt(var_L_u + var_R_u)
    ci_u   <- tau_u + c(-1.96, 1.96) * se_u
    pval_u <- 2 * pnorm(-abs(tau_u / se_u))

    result_unadj <- tibble(
      outcome      = outcome,
      running_var  = runvar,
      cutoff       = cutoff,
      model        = "Unadjusted",
      tau          = tau_u,
      se           = se_u,
      ci_lower     = ci_u[1],
      ci_upper     = ci_u[2],
      p_value      = pval_u,
      significant  = pval_u < 0.05
    )
    all_results <- append(all_results, list(result_unadj))
  }

  # --- covariate-adjusted --------------------------------------------------
  df_adj <- df %>% filter(complete.cases(across(all_of(c(cont_covs, "Charter", "DASS")))))
  left_a  <- df_adj %>% filter(.data[[runvar]] < cutoff)
  right_a <- df_adj %>% filter(.data[[runvar]] >= cutoff)
  if (nrow(left_a) < 1 || nrow(right_a) < 1) {
    warning("Skipping covariate-adjusted for ", outcome, ": insufficient adjusted data (left=", nrow(left_a), ", right=", nrow(right_a), ")")
    next
  }

  predictors <- c(runvar, cont_covs, "Charter", "DASS")
  fml_a <- reformulate(predictors, response = outcome)
  mod_L_a <- tryCatch(gpss(fml_a, data = left_a, optimize = TRUE), error = function(e) { warning("gpss error (adj) for ", outcome, ": ", e$message); return(NULL) })
  mod_R_a <- tryCatch(gpss(fml_a, data = right_a, optimize = TRUE), error = function(e) { warning("gpss error (adj) for ", outcome, ": ", e$message); return(NULL) })
  if (is.null(mod_L_a) || is.null(mod_R_a)) next

  profile_a <- df_adj %>%
    filter(between(.data[[runvar]], cutoff - local_bw, cutoff + local_bw)) %>%
    summarise(
      across(all_of(cont_covs), ~ mean(.x, na.rm = TRUE)),
      Charter = mean(Charter, na.rm = TRUE),
      DASS = mean(DASS, na.rm = TRUE)
    )
  profile_a[[runvar]] <- cutoff
  profile_a <- profile_a %>% select(all_of(predictors))

  pred_L_a <- gp_predict(mod_L_a, profile_a)
  pred_R_a <- gp_predict(mod_R_a, profile_a)

  if (is.null(pred_L_a$Ys_mean_orig) || is.null(pred_R_a$Ys_mean_orig) ||
      is.null(pred_L_a$Ys_cov_orig)  || is.null(pred_R_a$Ys_cov_orig)) {
    warning("Skipping covariate-adjusted for ", outcome, ": missing prediction components")
    next
  }

  mu_L_a  <- drop(pred_L_a$Ys_mean_orig)
  mu_R_a  <- drop(pred_R_a$Ys_mean_orig)
  var_L_a <- drop(pred_L_a$Ys_cov_orig)
  var_R_a <- drop(pred_R_a$Ys_cov_orig)

  tau_a  <- as.numeric(mu_R_a - mu_L_a)
  se_a   <- sqrt(var_L_a + var_R_a)
  ci_a   <- tau_a + c(-1.96, 1.96) * se_a
  pval_a <- 2 * pnorm(-abs(tau_a / se_a))

  result_adj <- tibble(
    outcome      = outcome,
    running_var  = runvar,
    cutoff       = cutoff,
    model        = "Covariate-adjusted",
    tau          = tau_a,
    se           = se_a,
    ci_lower     = ci_a[1],
    ci_upper     = ci_a[2],
    p_value      = pval_a,
    significant  = pval_a < 0.05
  )
  all_results <- append(all_results, list(result_adj))
}

# combine everything
comparison <- bind_rows(all_results)
print(comparison)
## # A tibble: 16 × 10
##    outcome    running_var cutoff model      tau     se ci_lower ci_upper p_value
##    <chr>      <chr>        <dbl> <chr>    <dbl>  <dbl>    <dbl>    <dbl>   <dbl>
##  1 chronic_a… frpm_perce…     35 Unad…   7.87   17.0     -25.5     41.3    0.644
##  2 chronic_a… frpm_perce…     35 Cova…   4.85   13.9     -22.5     32.2    0.728
##  3 btb        frpm_perce…     35 Unad…  -0.231   0.650    -1.51     1.04   0.723
##  4 btb        frpm_perce…     35 Cova…   0.0179  0.527    -1.01     1.05   0.973
##  5 avg_pct_m… frpm_perce…     35 Unad… -14.9    19.0     -52.1     22.3    0.432
##  6 avg_pct_m… frpm_perce…     35 Cova…  -5.15   17.4     -39.2     28.9    0.767
##  7 avg_pct_m… frpm_perce…     35 Unad… -18.5    20.4     -58.5     21.5    0.364
##  8 avg_pct_m… frpm_perce…     35 Cova…  -9.30   18.3     -45.1     26.5    0.610
##  9 avg_pct_n… frpm_perce…     35 Unad…  11.0    16.4     -21.2     43.2    0.502
## 10 avg_pct_n… frpm_perce…     35 Cova…   4.28   15.3     -25.7     34.3    0.780
## 11 avg_pct_n… frpm_perce…     35 Unad…  14.3    20.1     -25.2     53.8    0.478
## 12 avg_pct_n… frpm_perce…     35 Cova…   4.48   18.1     -31.0     40.0    0.805
## 13 avg_scale… frpm_perce…     35 Unad… -27.9    79.4    -184.     128.     0.725
## 14 avg_scale… frpm_perce…     35 Cova…  -9.07   82.7    -171.     153.     0.913
## 15 avg_scale… frpm_perce…     35 Unad… -35.1    64.0    -161.      90.3    0.583
## 16 avg_scale… frpm_perce…     35 Cova… -17.4    64.5    -144.     109.     0.788
## # ℹ 1 more variable: significant <lgl>
library(dplyr)
library(gpss)   # >=0.2.0
library(tibble)

# ---- settings ------------------------------------------------------------
outcomes <- c(
  "chronic_absenteeism",
  "btb",
  "avg_pct_met_above_ELA",
  "avg_pct_met_above_Math",
  "avg_pct_not_met_ELA",
  "avg_pct_not_met_Math",
  "avg_scale_score_ELA",
  "avg_scale_score_Math"
)
runvar   <- "frpm_percent"
cutoff   <- 75
local_bw <- 5
cont_covs <- c("pct_hispanic", "pct_white", "pct_two_or_more")

all_results <- list()

for (outcome in outcomes) {
  # prepare data for this outcome
  df <- df_clean %>%
    mutate(
      Charter = as.numeric(trimws(Charter.School) == "Yes"),
      DASS    = as.numeric(trimws(DASS) == "Yes")
    ) %>%
    filter(!is.na(.data[[outcome]]), !is.na(.data[[runvar]]))

  # split
  left  <- df %>% filter(.data[[runvar]] < cutoff)
  right <- df %>% filter(.data[[runvar]] >= cutoff)
  if (nrow(left) < 1 || nrow(right) < 1) {
    warning("Skipping ", outcome, ": insufficient data on one side of cutoff (left=", nrow(left), ", right=", nrow(right), ")")
    next
  }

  # --- unadjusted ----------------------------------------------------------
  fml_u <- reformulate(runvar, response = outcome)
  mod_L_u <- tryCatch(gpss(fml_u, data = left, optimize = TRUE), error = function(e) { warning("gpss error (unadj) for ", outcome, ": ", e$message); return(NULL) })
  mod_R_u <- tryCatch(gpss(fml_u, data = right, optimize = TRUE), error = function(e) { warning("gpss error (unadj) for ", outcome, ": ", e$message); return(NULL) })
  if (is.null(mod_L_u) || is.null(mod_R_u)) next

  profile_u <- tibble(!!runvar := cutoff)
  pred_L_u <- gp_predict(mod_L_u, profile_u)
  pred_R_u <- gp_predict(mod_R_u, profile_u)

  if (is.null(pred_L_u$Ys_mean_orig) || is.null(pred_R_u$Ys_mean_orig) ||
      is.null(pred_L_u$Ys_cov_orig)  || is.null(pred_R_u$Ys_cov_orig)) {
    warning("Skipping unadjusted for ", outcome, ": missing prediction components")
  } else {
    mu_L_u  <- drop(pred_L_u$Ys_mean_orig)
    mu_R_u  <- drop(pred_R_u$Ys_mean_orig)
    var_L_u <- drop(pred_L_u$Ys_cov_orig)
    var_R_u <- drop(pred_R_u$Ys_cov_orig)

    tau_u  <- as.numeric(mu_R_u - mu_L_u)
    se_u   <- sqrt(var_L_u + var_R_u)
    ci_u   <- tau_u + c(-1.96, 1.96) * se_u
    pval_u <- 2 * pnorm(-abs(tau_u / se_u))

    result_unadj <- tibble(
      outcome      = outcome,
      running_var  = runvar,
      cutoff       = cutoff,
      model        = "Unadjusted",
      tau          = tau_u,
      se           = se_u,
      ci_lower     = ci_u[1],
      ci_upper     = ci_u[2],
      p_value      = pval_u,
      significant  = pval_u < 0.05
    )
    all_results <- append(all_results, list(result_unadj))
  }

  # --- covariate-adjusted --------------------------------------------------
  df_adj <- df %>% filter(complete.cases(across(all_of(c(cont_covs, "Charter", "DASS")))))
  left_a  <- df_adj %>% filter(.data[[runvar]] < cutoff)
  right_a <- df_adj %>% filter(.data[[runvar]] >= cutoff)
  if (nrow(left_a) < 1 || nrow(right_a) < 1) {
    warning("Skipping covariate-adjusted for ", outcome, ": insufficient adjusted data (left=", nrow(left_a), ", right=", nrow(right_a), ")")
    next
  }

  predictors <- c(runvar, cont_covs, "Charter", "DASS")
  fml_a <- reformulate(predictors, response = outcome)
  mod_L_a <- tryCatch(gpss(fml_a, data = left_a, optimize = TRUE), error = function(e) { warning("gpss error (adj) for ", outcome, ": ", e$message); return(NULL) })
  mod_R_a <- tryCatch(gpss(fml_a, data = right_a, optimize = TRUE), error = function(e) { warning("gpss error (adj) for ", outcome, ": ", e$message); return(NULL) })
  if (is.null(mod_L_a) || is.null(mod_R_a)) next

  profile_a <- df_adj %>%
    filter(between(.data[[runvar]], cutoff - local_bw, cutoff + local_bw)) %>%
    summarise(
      across(all_of(cont_covs), ~ mean(.x, na.rm = TRUE)),
      Charter = mean(Charter, na.rm = TRUE),
      DASS = mean(DASS, na.rm = TRUE)
    )
  profile_a[[runvar]] <- cutoff
  profile_a <- profile_a %>% select(all_of(predictors))

  pred_L_a <- gp_predict(mod_L_a, profile_a)
  pred_R_a <- gp_predict(mod_R_a, profile_a)

  if (is.null(pred_L_a$Ys_mean_orig) || is.null(pred_R_a$Ys_mean_orig) ||
      is.null(pred_L_a$Ys_cov_orig)  || is.null(pred_R_a$Ys_cov_orig)) {
    warning("Skipping covariate-adjusted for ", outcome, ": missing prediction components")
    next
  }

  mu_L_a  <- drop(pred_L_a$Ys_mean_orig)
  mu_R_a  <- drop(pred_R_a$Ys_mean_orig)
  var_L_a <- drop(pred_L_a$Ys_cov_orig)
  var_R_a <- drop(pred_R_a$Ys_cov_orig)

  tau_a  <- as.numeric(mu_R_a - mu_L_a)
  se_a   <- sqrt(var_L_a + var_R_a)
  ci_a   <- tau_a + c(-1.96, 1.96) * se_a
  pval_a <- 2 * pnorm(-abs(tau_a / se_a))

  result_adj <- tibble(
    outcome      = outcome,
    running_var  = runvar,
    cutoff       = cutoff,
    model        = "Covariate-adjusted",
    tau          = tau_a,
    se           = se_a,
    ci_lower     = ci_a[1],
    ci_upper     = ci_a[2],
    p_value      = pval_a,
    significant  = pval_a < 0.05
  )
  all_results <- append(all_results, list(result_adj))
}

# combine everything
comparison <- bind_rows(all_results)
print(comparison)
## # A tibble: 16 × 10
##    outcome     running_var cutoff model     tau     se ci_lower ci_upper p_value
##    <chr>       <chr>        <dbl> <chr>   <dbl>  <dbl>    <dbl>    <dbl>   <dbl>
##  1 chronic_ab… frpm_perce…     75 Unad… -0.0740 19.0     -37.3     37.1    0.997
##  2 chronic_ab… frpm_perce…     75 Cova… -1.83   15.3     -31.8     28.1    0.905
##  3 btb         frpm_perce…     75 Unad… -0.159   0.693    -1.52     1.20   0.818
##  4 btb         frpm_perce…     75 Cova… -0.0461  0.554    -1.13     1.04   0.934
##  5 avg_pct_me… frpm_perce…     75 Unad…  1.10   19.4     -37.0     39.2    0.955
##  6 avg_pct_me… frpm_perce…     75 Cova…  3.17   19.8     -35.6     42.0    0.873
##  7 avg_pct_me… frpm_perce…     75 Unad… -0.325  20.3     -40.1     39.5    0.987
##  8 avg_pct_me… frpm_perce…     75 Cova…  3.85   21.0     -37.3     45.0    0.855
##  9 avg_pct_no… frpm_perce…     75 Unad… -2.40   18.2     -38.1     33.3    0.895
## 10 avg_pct_no… frpm_perce…     75 Cova… -5.08   17.6     -39.6     29.5    0.773
## 11 avg_pct_no… frpm_perce…     75 Unad… -3.59   22.7     -48.0     40.8    0.874
## 12 avg_pct_no… frpm_perce…     75 Cova… -8.99   21.5     -51.1     33.1    0.675
## 13 avg_scale_… frpm_perce…     75 Unad… -0.0527 88.5    -174.     173.     1.00 
## 14 avg_scale_… frpm_perce…     75 Cova… -5.86   93.8    -190.     178.     0.950
## 15 avg_scale_… frpm_perce…     75 Unad… -3.23   66.2    -133.     127.     0.961
## 16 avg_scale_… frpm_perce…     75 Cova… -2.06   69.6    -138.     134.     0.976
## # ℹ 1 more variable: significant <lgl>
library(dplyr)
library(gpss)   # >=0.2.0
library(tibble)

# ---- settings ------------------------------------------------------------
outcomes <- c(
  "chronic_absenteeism",
  "btb",
  "avg_pct_met_above_ELA",
  "avg_pct_met_above_Math",
  "avg_pct_not_met_ELA",
  "avg_pct_not_met_Math",
  "avg_scale_score_ELA",
  "avg_scale_score_Math"
)
runvar   <- "undup_pct"
cutoff   <- 40
local_bw <- 5
cont_covs <- c("pct_hispanic", "pct_white", "pct_two_or_more")

all_results <- list()

for (outcome in outcomes) {
  # prepare data for this outcome
  df <- df_clean %>%
    mutate(
      Charter = as.numeric(trimws(Charter.School) == "Yes"),
      DASS    = as.numeric(trimws(DASS) == "Yes")
    ) %>%
    filter(!is.na(.data[[outcome]]), !is.na(.data[[runvar]]))

  # split
  left  <- df %>% filter(.data[[runvar]] < cutoff)
  right <- df %>% filter(.data[[runvar]] >= cutoff)
  if (nrow(left) < 1 || nrow(right) < 1) {
    warning("Skipping ", outcome, ": insufficient data on one side of cutoff (left=", nrow(left), ", right=", nrow(right), ")")
    next
  }

  # --- unadjusted ----------------------------------------------------------
  fml_u <- reformulate(runvar, response = outcome)
  mod_L_u <- tryCatch(gpss(fml_u, data = left, optimize = TRUE), error = function(e) { warning("gpss error (unadj) for ", outcome, ": ", e$message); return(NULL) })
  mod_R_u <- tryCatch(gpss(fml_u, data = right, optimize = TRUE), error = function(e) { warning("gpss error (unadj) for ", outcome, ": ", e$message); return(NULL) })
  if (is.null(mod_L_u) || is.null(mod_R_u)) next

  profile_u <- tibble(!!runvar := cutoff)
  pred_L_u <- gp_predict(mod_L_u, profile_u)
  pred_R_u <- gp_predict(mod_R_u, profile_u)

  if (is.null(pred_L_u$Ys_mean_orig) || is.null(pred_R_u$Ys_mean_orig) ||
      is.null(pred_L_u$Ys_cov_orig)  || is.null(pred_R_u$Ys_cov_orig)) {
    warning("Skipping unadjusted for ", outcome, ": missing prediction components")
  } else {
    mu_L_u  <- drop(pred_L_u$Ys_mean_orig)
    mu_R_u  <- drop(pred_R_u$Ys_mean_orig)
    var_L_u <- drop(pred_L_u$Ys_cov_orig)
    var_R_u <- drop(pred_R_u$Ys_cov_orig)

    tau_u  <- as.numeric(mu_R_u - mu_L_u)
    se_u   <- sqrt(var_L_u + var_R_u)
    ci_u   <- tau_u + c(-1.96, 1.96) * se_u
    pval_u <- 2 * pnorm(-abs(tau_u / se_u))

    result_unadj <- tibble(
      outcome      = outcome,
      running_var  = runvar,
      cutoff       = cutoff,
      model        = "Unadjusted",
      tau          = tau_u,
      se           = se_u,
      ci_lower     = ci_u[1],
      ci_upper     = ci_u[2],
      p_value      = pval_u,
      significant  = pval_u < 0.05
    )
    all_results <- append(all_results, list(result_unadj))
  }

  # --- covariate-adjusted --------------------------------------------------
  df_adj <- df %>% filter(complete.cases(across(all_of(c(cont_covs, "Charter", "DASS")))))
  left_a  <- df_adj %>% filter(.data[[runvar]] < cutoff)
  right_a <- df_adj %>% filter(.data[[runvar]] >= cutoff)
  if (nrow(left_a) < 1 || nrow(right_a) < 1) {
    warning("Skipping covariate-adjusted for ", outcome, ": insufficient adjusted data (left=", nrow(left_a), ", right=", nrow(right_a), ")")
    next
  }

  predictors <- c(runvar, cont_covs, "Charter", "DASS")
  fml_a <- reformulate(predictors, response = outcome)
  mod_L_a <- tryCatch(gpss(fml_a, data = left_a, optimize = TRUE), error = function(e) { warning("gpss error (adj) for ", outcome, ": ", e$message); return(NULL) })
  mod_R_a <- tryCatch(gpss(fml_a, data = right_a, optimize = TRUE), error = function(e) { warning("gpss error (adj) for ", outcome, ": ", e$message); return(NULL) })
  if (is.null(mod_L_a) || is.null(mod_R_a)) next

  profile_a <- df_adj %>%
    filter(between(.data[[runvar]], cutoff - local_bw, cutoff + local_bw)) %>%
    summarise(
      across(all_of(cont_covs), ~ mean(.x, na.rm = TRUE)),
      Charter = mean(Charter, na.rm = TRUE),
      DASS = mean(DASS, na.rm = TRUE)
    )
  profile_a[[runvar]] <- cutoff
  profile_a <- profile_a %>% select(all_of(predictors))

  pred_L_a <- gp_predict(mod_L_a, profile_a)
  pred_R_a <- gp_predict(mod_R_a, profile_a)

  if (is.null(pred_L_a$Ys_mean_orig) || is.null(pred_R_a$Ys_mean_orig) ||
      is.null(pred_L_a$Ys_cov_orig)  || is.null(pred_R_a$Ys_cov_orig)) {
    warning("Skipping covariate-adjusted for ", outcome, ": missing prediction components")
    next
  }

  mu_L_a  <- drop(pred_L_a$Ys_mean_orig)
  mu_R_a  <- drop(pred_R_a$Ys_mean_orig)
  var_L_a <- drop(pred_L_a$Ys_cov_orig)
  var_R_a <- drop(pred_R_a$Ys_cov_orig)

  tau_a  <- as.numeric(mu_R_a - mu_L_a)
  se_a   <- sqrt(var_L_a + var_R_a)
  ci_a   <- tau_a + c(-1.96, 1.96) * se_a
  pval_a <- 2 * pnorm(-abs(tau_a / se_a))

  result_adj <- tibble(
    outcome      = outcome,
    running_var  = runvar,
    cutoff       = cutoff,
    model        = "Covariate-adjusted",
    tau          = tau_a,
    se           = se_a,
    ci_lower     = ci_a[1],
    ci_upper     = ci_a[2],
    p_value      = pval_a,
    significant  = pval_a < 0.05
  )
  all_results <- append(all_results, list(result_adj))
}

# combine everything
comparison <- bind_rows(all_results)
print(comparison)
## # A tibble: 16 × 10
##    outcome    running_var cutoff model      tau     se ci_lower ci_upper p_value
##    <chr>      <chr>        <dbl> <chr>    <dbl>  <dbl>    <dbl>    <dbl>   <dbl>
##  1 chronic_a… undup_pct       40 Unad…   1.43   18.1     -34.0    36.9     0.937
##  2 chronic_a… undup_pct       40 Cova…  -1.89   15.3     -31.9    28.1     0.902
##  3 btb        undup_pct       40 Unad…   0.276   0.679    -1.06    1.61    0.685
##  4 btb        undup_pct       40 Cova…  -0.0789  0.548    -1.15    0.995   0.885
##  5 avg_pct_m… undup_pct       40 Unad…  -7.41   19.7     -45.9    31.1     0.706
##  6 avg_pct_m… undup_pct       40 Cova…  -2.07   18.4     -38.1    34.0     0.911
##  7 avg_pct_m… undup_pct       40 Unad…  -6.34   20.9     -47.3    34.6     0.761
##  8 avg_pct_m… undup_pct       40 Cova…  -1.07   19.4     -39.2    37.0     0.956
##  9 avg_pct_n… undup_pct       40 Unad…   7.64   17.3     -26.3    41.5     0.659
## 10 avg_pct_n… undup_pct       40 Cova…   4.07   16.2     -27.8    35.9     0.802
## 11 avg_pct_n… undup_pct       40 Unad…   3.88   21.0     -37.2    45.0     0.853
## 12 avg_pct_n… undup_pct       40 Cova…   0.0787 19.4     -37.9    38.0     0.997
## 13 avg_scale… undup_pct       40 Unad… -37.0    83.5    -201.    127.      0.657
## 14 avg_scale… undup_pct       40 Cova… -30.0    84.7    -196.    136.      0.724
## 15 avg_scale… undup_pct       40 Unad… -30.2    66.1    -160.     99.3     0.647
## 16 avg_scale… undup_pct       40 Cova… -27.6    64.9    -155.     99.6     0.671
## # ℹ 1 more variable: significant <lgl>

extension of gp_rdd

library(dplyr)
library(tibble)

# ---- extended gp_rdd that accepts covariates --------------------------------
gp_rdd_cov <- function(X, Y, cut, covs = NULL,
                       alpha = 0.05,
                       b = NULL,
                       trim = FALSE,
                       trim_k_value = 0.1,
                       scale = TRUE,
                       local_bw = 5) {
  cutpoint_scalar <- cut

  # handle missingness
  if (!is.null(covs)) {
    complete_idx <- complete.cases(X, Y, covs)
    covs <- covs[complete_idx, , drop = FALSE]
  } else {
    complete_idx <- complete.cases(X, Y)
  }
  X <- as.numeric(X[complete_idx])
  Y <- as.numeric(Y[complete_idx])
  if (!is.null(covs)) covs <- covs[complete_idx, , drop = FALSE]

  # split left/right
  left_idx  <- which(X < cutpoint_scalar)
  right_idx <- which(X >= cutpoint_scalar)

  X_left  <- X[left_idx]
  X_right <- X[right_idx]
  Y_left  <- Y[left_idx]
  Y_right <- Y[right_idx]
  covs_left  <- if (!is.null(covs)) covs[left_idx, , drop = FALSE] else NULL
  covs_right <- if (!is.null(covs)) covs[right_idx, , drop = FALSE] else NULL

  # trimming logic
  if (isTRUE(trim) & isTRUE(scale)) {
    stop("If `trim==TRUE`, scale must be FALSE")
  }
  if (isTRUE(trim)) {
    b_left <- getb_maxvar(X_left)
    b_right <- getb_maxvar(X_right)

    trim_at_left  <- cutpoint_scalar - sqrt(-1 * b_left * log(trim_k_value))
    trim_at_right <- cutpoint_scalar + sqrt(-1 * b_right * log(trim_k_value))

    keep_left  <- X_left > trim_at_left
    keep_right <- X_right < trim_at_right

    X_left  <- X_left[keep_left]
    Y_left  <- Y_left[keep_left]
    if (!is.null(covs_left)) covs_left <- covs_left[keep_left, , drop = FALSE]

    X_right <- X_right[keep_right]
    Y_right <- Y_right[keep_right]
    if (!is.null(covs_right)) covs_right <- covs_right[keep_right, , drop = FALSE]
  } else {
    b_left  <- b
    b_right <- b
  }

  # construct training design matrices
  build_X_mat <- function(x_vec, cov_df) {
    if (is.null(cov_df)) {
      matrix(x_vec, ncol = 1, dimnames = list(NULL, "X"))
    } else {
      as.matrix(cbind(X = x_vec, cov_df))
    }
  }
  Xmat_left  <- build_X_mat(X_left, covs_left)
  Xmat_right <- build_X_mat(X_right, covs_right)

  # fit GPs (optimize=TRUE to mirror original)
  gp_train_l <- gp_train(X = Xmat_left, Y = Y_left, b = b_left, scale = scale, optimize = TRUE)
  gp_train_r <- gp_train(X = Xmat_right, Y = Y_right, b = b_right, scale = scale, optimize = TRUE)

  # build prediction row matching training design
  if (!is.null(covs)) {
    window_idx <- which(X >= (cutpoint_scalar - local_bw) & X <= (cutpoint_scalar + local_bw))
    if (length(window_idx) == 0) {
      warning("No observations in local_bw window for covariate profile; using global means instead.")
      cov_profile <- colMeans(covs, na.rm = TRUE)
    } else {
      cov_profile <- colMeans(covs[window_idx, , drop = FALSE], na.rm = TRUE)
    }
    pred_df <- as.data.frame(t(c(cutpoint_scalar, cov_profile)))
    colnames(pred_df) <- colnames(Xmat_left)  # assumes same structure left/right
  } else {
    pred_df <- as.data.frame(t(c(cutpoint_scalar)))
    colnames(pred_df) <- colnames(Xmat_left)  # should be "X"
    cov_profile <- NULL
  }

  # predictions
  gp_pred_l <- gp_predict(gp_train_l, pred_df)
  gp_pred_r <- gp_predict(gp_train_r, pred_df)

  # treatment effect and uncertainty
  pred_l <- gp_pred_l$Ys_mean_orig[1]
  pred_r <- gp_pred_r$Ys_mean_orig[1]
  tau <- pred_r - pred_l
  se <- sqrt(diag(gp_pred_l$f_cov_orig)[1] + diag(gp_pred_r$f_cov_orig)[1])
  ci <- c(lower = tau - qnorm(1 - alpha / 2) * se,
          upper = tau + qnorm(1 - alpha / 2) * se)

  results <- list(
    tau = tau, se = se, ci = ci,
    pred_l = pred_l, pred_r = pred_r,
    b_left = gp_train_l$b, b_right = gp_train_r$b,
    s2_left = gp_train_l$s2, s2_right = gp_train_r$s2,
    n_left = nrow(Xmat_left), n_right = nrow(Xmat_right),
    trim = trim,
    covariate_profile = cov_profile,
    X = X, Y = Y,
    gp_train_l = gp_train_l,
    gp_train_r = gp_train_r,
    cut = cutpoint_scalar
  )

  if (isTRUE(trim)) {
    results <- append(results, c(trim_at_left = trim_at_left, trim_at_right = trim_at_right))
  }
  return(results)
}

# ---- prepare covariates --------------------------------------------------
covariates <- df_clean %>%
  mutate(
    Charter = as.numeric(trimws(Charter.School) == "Yes"),
    DASS    = as.numeric(trimws(DASS) == "Yes")
  ) %>%
  select(Charter, DASS, pct_hispanic, pct_white, pct_two_or_more)
library(dplyr)
library(tibble)

# ---- fixed gp_rdd_cov that handles complete.cases correctly ----------
gp_rdd_cov <- function(X, Y, cut, covs = NULL,
                       alpha = 0.05,
                       b = NULL,
                       trim = FALSE,
                       trim_k_value = 0.1,
                       scale = TRUE,
                       local_bw = 5) {
  cutpoint_scalar <- cut

  # proper missingness handling: align X, Y, and covs
  if (!is.null(covs)) {
    temp_all <- as.data.frame(cbind(X = X, Y = Y, covs))
    complete_idx <- complete.cases(temp_all)
    X <- as.numeric(X[complete_idx])
    Y <- as.numeric(Y[complete_idx])
    covs <- covs[complete_idx, , drop = FALSE]
  } else {
    complete_idx <- complete.cases(X, Y)
    X <- as.numeric(X[complete_idx])
    Y <- as.numeric(Y[complete_idx])
  }

  # split left/right
  left_idx  <- which(X < cutpoint_scalar)
  right_idx <- which(X >= cutpoint_scalar)

  X_left  <- X[left_idx]
  X_right <- X[right_idx]
  Y_left  <- Y[left_idx]
  Y_right <- Y[right_idx]
  covs_left  <- if (!is.null(covs)) covs[left_idx, , drop = FALSE] else NULL
  covs_right <- if (!is.null(covs)) covs[right_idx, , drop = FALSE] else NULL

  # trimming logic
  if (isTRUE(trim) & isTRUE(scale)) {
    stop("If `trim==TRUE`, scale must be FALSE")
  }
  if (isTRUE(trim)) {
    b_left <- getb_maxvar(X_left)
    b_right <- getb_maxvar(X_right)

    trim_at_left  <- cutpoint_scalar - sqrt(-1 * b_left * log(trim_k_value))
    trim_at_right <- cutpoint_scalar + sqrt(-1 * b_right * log(trim_k_value))

    keep_left  <- X_left > trim_at_left
    keep_right <- X_right < trim_at_right

    X_left  <- X_left[keep_left]
    Y_left  <- Y_left[keep_left]
    if (!is.null(covs_left)) covs_left <- covs_left[keep_left, , drop = FALSE]

    X_right <- X_right[keep_right]
    Y_right <- Y_right[keep_right]
    if (!is.null(covs_right)) covs_right <- covs_right[keep_right, , drop = FALSE]
  } else {
    b_left  <- b
    b_right <- b
  }

  # construct design matrices
  build_X_mat <- function(x_vec, cov_df) {
    if (is.null(cov_df)) {
      matrix(x_vec, ncol = 1, dimnames = list(NULL, "X"))
    } else {
      as.matrix(cbind(X = x_vec, cov_df))
    }
  }
  Xmat_left  <- build_X_mat(X_left, covs_left)
  Xmat_right <- build_X_mat(X_right, covs_right)

  # fit GPs
  gp_train_l <- gp_train(X = Xmat_left, Y = Y_left, b = b_left, scale = scale, optimize = TRUE)
  gp_train_r <- gp_train(X = Xmat_right, Y = Y_right, b = b_right, scale = scale, optimize = TRUE)

  # build prediction row matching training design
  if (!is.null(covs)) {
    window_idx <- which(X >= (cutpoint_scalar - local_bw) & X <= (cutpoint_scalar + local_bw))
    if (length(window_idx) == 0) {
      warning("No observations in local_bw window for covariate profile; using global means instead.")
      cov_profile <- colMeans(covs, na.rm = TRUE)
    } else {
      cov_profile <- colMeans(covs[window_idx, , drop = FALSE], na.rm = TRUE)
    }
    pred_df <- as.data.frame(t(c(cutpoint_scalar, cov_profile)))
    colnames(pred_df) <- colnames(Xmat_left)
  } else {
    pred_df <- as.data.frame(t(c(cutpoint_scalar)))
    colnames(pred_df) <- colnames(Xmat_left)
    cov_profile <- NULL
  }

  # predictions
  gp_pred_l <- gp_predict(gp_train_l, pred_df)
  gp_pred_r <- gp_predict(gp_train_r, pred_df)

  # effect and uncertainty
  pred_l <- gp_pred_l$Ys_mean_orig[1]
  pred_r <- gp_pred_r$Ys_mean_orig[1]
  tau <- pred_r - pred_l
  se <- sqrt(diag(gp_pred_l$f_cov_orig)[1] + diag(gp_pred_r$f_cov_orig)[1])
  ci <- c(lower = tau - qnorm(1 - alpha / 2) * se,
          upper = tau + qnorm(1 - alpha / 2) * se)

  results <- list(
    tau = tau, se = se, ci = ci,
    pred_l = pred_l, pred_r = pred_r,
    b_left = gp_train_l$b, b_right = gp_train_r$b,
    s2_left = gp_train_l$s2, s2_right = gp_train_r$s2,
    n_left = nrow(Xmat_left), n_right = nrow(Xmat_right),
    trim = trim,
    covariate_profile = cov_profile,
    X = X, Y = Y,
    gp_train_l = gp_train_l,
    gp_train_r = gp_train_r,
    cut = cutpoint_scalar
  )

  if (isTRUE(trim)) {
    results <- append(results, c(trim_at_left = trim_at_left, trim_at_right = trim_at_right))
  }
  return(results)
}

# ---- templated runner --------------------------------------------------
run_rdd_comparisons <- function(df, running_variable, cutoff, outcomes, covariate_names = c("Charter", "DASS", "pct_hispanic", "pct_white", "pct_two_or_more"), local_bw = 5) {
  # prepare covariates once
  covs_df <- df %>%
    mutate(
      Charter = as.numeric(trimws(Charter.School) == "Yes"),
      DASS    = as.numeric(trimws(DASS) == "Yes")
    ) %>%
    select(all_of(covariate_names))

  results <- vector("list", length(outcomes) * 3)
  i <- 1L

  for (outcome in outcomes) {
    X <- df[[running_variable]]
    Y <- df[[outcome]]

    # 1. gp_rdd unadjusted
    rdd_res <- gp_rdd(X, Y, cutoff, trim = FALSE, scale = TRUE)
    tau_rdd <- rdd_res$tau
    se_rdd  <- rdd_res$se
    ci_lo_rdd <- rdd_res$ci[1]
    ci_hi_rdd <- rdd_res$ci[2]
    pval_rdd <- 2 * pnorm(-abs(tau_rdd / se_rdd))
    sig_rdd  <- pval_rdd < 0.05

    results[[i]] <- tibble(
      outcome     = outcome,
      running_variable = running_variable,
      cutoff      = cutoff,
      model       = "gp_rdd",
      tau         = tau_rdd,
      se          = se_rdd,
      ci_lower    = ci_lo_rdd,
      ci_upper    = ci_hi_rdd,
      p_value     = pval_rdd,
      significant = sig_rdd
    )
    i <- i + 1L

    # 2. gp_rdd_cov unadjusted
    rdd_cov_unadj <- gp_rdd_cov(X = X, Y = Y, cut = cutoff, covs = NULL, trim = FALSE, scale = TRUE, local_bw = local_bw)
    tau_cov_unadj <- rdd_cov_unadj$tau
    se_cov_unadj  <- rdd_cov_unadj$se
    ci_lo_cov_unadj <- rdd_cov_unadj$ci["lower"]
    ci_hi_cov_unadj <- rdd_cov_unadj$ci["upper"]
    pval_cov_unadj <- 2 * pnorm(-abs(tau_cov_unadj / se_cov_unadj))
    sig_cov_unadj  <- pval_cov_unadj < 0.05

    results[[i]] <- tibble(
      outcome     = outcome,
      running_variable = running_variable,
      cutoff      = cutoff,
      model       = "gp_rdd_cov (no covs)",
      tau         = tau_cov_unadj,
      se          = se_cov_unadj,
      ci_lower    = ci_lo_cov_unadj,
      ci_upper    = ci_hi_cov_unadj,
      p_value     = pval_cov_unadj,
      significant = sig_cov_unadj
    )
    i <- i + 1L

    # 3. gp_rdd_cov with covariate adjustment
    rdd_cov_adj <- gp_rdd_cov(X = X, Y = Y, cut = cutoff, covs = covs_df, trim = FALSE, scale = TRUE, local_bw = local_bw)
    tau_cov_adj <- rdd_cov_adj$tau
    se_cov_adj  <- rdd_cov_adj$se
    ci_lo_cov_adj <- rdd_cov_adj$ci["lower"]
    ci_hi_cov_adj <- rdd_cov_adj$ci["upper"]
    pval_cov_adj <- 2 * pnorm(-abs(tau_cov_adj / se_cov_adj))
    sig_cov_adj  <- pval_cov_adj < 0.05

    results[[i]] <- tibble(
      outcome     = outcome,
      running_variable = running_variable,
      cutoff      = cutoff,
      model       = "gp_rdd_cov (covariate-adjusted)",
      tau         = tau_cov_adj,
      se          = se_cov_adj,
      ci_lower    = ci_lo_cov_adj,
      ci_upper    = ci_hi_cov_adj,
      p_value     = pval_cov_adj,
      significant = sig_cov_adj
    )
    i <- i + 1L
  }

  bind_rows(results)
}

FRPM 35%

# ---- Usage FRPM 35%  ------------------------------------------------------
outcomes <- c(
  "chronic_absenteeism",
  "btb",
  "avg_pct_met_above_ELA",
  "avg_pct_met_above_Math",
  "avg_pct_not_met_ELA",
  "avg_pct_not_met_Math",
  "avg_scale_score_ELA",
  "avg_scale_score_Math"
)
frpm_gp_rdd_35_comparison_table <- run_rdd_comparisons(
  df = df_clean,
  running_variable = "frpm_percent",
  cutoff = 35,
  outcomes = outcomes,
  covariate_names = c("Charter", "DASS", "pct_hispanic", "pct_white", "pct_two_or_more"),
  local_bw = 5
)

print(frpm_gp_rdd_35_comparison_table)
## # A tibble: 24 × 10
##    outcome        running_variable cutoff model      tau    se ci_lower ci_upper
##    <chr>          <chr>             <dbl> <chr>    <dbl> <dbl>    <dbl>    <dbl>
##  1 chronic_absen… frpm_percent         35 gp_r…   7.87   5.74    -3.38    19.1  
##  2 chronic_absen… frpm_percent         35 gp_r…   7.87   5.74    -3.38    19.1  
##  3 chronic_absen… frpm_percent         35 gp_r…   4.85   8.40   -11.6     21.3  
##  4 btb            frpm_percent         35 gp_r…  -0.231  0.224   -0.669    0.208
##  5 btb            frpm_percent         35 gp_r…  -0.231  0.224   -0.669    0.208
##  6 btb            frpm_percent         35 gp_r…   0.0179 0.299   -0.568    0.604
##  7 avg_pct_met_a… frpm_percent         35 gp_r… -14.9    6.74   -28.1     -1.69 
##  8 avg_pct_met_a… frpm_percent         35 gp_r… -14.9    6.74   -28.1     -1.69 
##  9 avg_pct_met_a… frpm_percent         35 gp_r…  -5.15   9.86   -24.5     14.2  
## 10 avg_pct_met_a… frpm_percent         35 gp_r… -18.5    7.43   -33.1     -3.96 
## # ℹ 14 more rows
## # ℹ 2 more variables: p_value <dbl>, significant <lgl>

FRPM 75%

# ---- Usage FRPM 35%  ------------------------------------------------------
outcomes <- c(
  "chronic_absenteeism",
  "btb",
  "avg_pct_met_above_ELA",
  "avg_pct_met_above_Math",
  "avg_pct_not_met_ELA",
  "avg_pct_not_met_Math",
  "avg_scale_score_ELA",
  "avg_scale_score_Math"
)

frpm_gp_rdd_75_comparison_table <- run_rdd_comparisons(
  df = df_clean,
  running_variable = "frpm_percent",
  cutoff = 75,
  outcomes = outcomes,
  covariate_names = c("Charter", "DASS", "pct_hispanic", "pct_white", "pct_two_or_more"),
  local_bw = 5
)

print(frpm_gp_rdd_75_comparison_table)
## # A tibble: 24 × 10
##    outcome        running_variable cutoff model     tau     se ci_lower ci_upper
##    <chr>          <chr>             <dbl> <chr>   <dbl>  <dbl>    <dbl>    <dbl>
##  1 chronic_absen… frpm_percent         75 gp_r… -0.0740  6.06   -12.0     11.8  
##  2 chronic_absen… frpm_percent         75 gp_r… -0.0740  6.06   -12.0     11.8  
##  3 chronic_absen… frpm_percent         75 gp_r… -1.83    8.81   -19.1     15.4  
##  4 btb            frpm_percent         75 gp_r… -0.159   0.208   -0.567    0.249
##  5 btb            frpm_percent         75 gp_r… -0.159   0.208   -0.567    0.249
##  6 btb            frpm_percent         75 gp_r… -0.0461  0.303   -0.639    0.547
##  7 avg_pct_met_a… frpm_percent         75 gp_r…  1.10    6.08   -10.8     13.0  
##  8 avg_pct_met_a… frpm_percent         75 gp_r…  1.10    6.08   -10.8     13.0  
##  9 avg_pct_met_a… frpm_percent         75 gp_r…  3.17   11.6    -19.6     26.0  
## 10 avg_pct_met_a… frpm_percent         75 gp_r… -0.325   5.97   -12.0     11.4  
## # ℹ 14 more rows
## # ℹ 2 more variables: p_value <dbl>, significant <lgl>

Unduplicate Peer Percent 40%

# ---- Usage FRPM 35%  ------------------------------------------------------
outcomes <- c(
  "chronic_absenteeism",
  "btb",
  "avg_pct_met_above_ELA",
  "avg_pct_met_above_Math",
  "avg_pct_not_met_ELA",
  "avg_pct_not_met_Math",
  "avg_scale_score_ELA",
  "avg_scale_score_Math"
)

upp_gp_rdd_40_comparison_table <- run_rdd_comparisons(
  df = df_clean,
  running_variable = "undup_pct",
  cutoff = 40,
  outcomes = outcomes,
  covariate_names = c("Charter", "DASS", "pct_hispanic", "pct_white", "pct_two_or_more"),
  local_bw = 5
)

print(upp_gp_rdd_40_comparison_table)
## # A tibble: 24 × 10
##    outcome        running_variable cutoff model     tau     se ci_lower ci_upper
##    <chr>          <chr>             <dbl> <chr>   <dbl>  <dbl>    <dbl>    <dbl>
##  1 chronic_absen… undup_pct            40 gp_r…  1.43    6.90   -12.1     15.0  
##  2 chronic_absen… undup_pct            40 gp_r…  1.43    6.90   -12.1     15.0  
##  3 chronic_absen… undup_pct            40 gp_r… -1.89    9.64   -20.8     17.0  
##  4 btb            undup_pct            40 gp_r…  0.276   0.247   -0.208    0.759
##  5 btb            undup_pct            40 gp_r…  0.276   0.247   -0.208    0.759
##  6 btb            undup_pct            40 gp_r… -0.0789  0.326   -0.717    0.559
##  7 avg_pct_met_a… undup_pct            40 gp_r… -7.41    7.36   -21.8      7.01 
##  8 avg_pct_met_a… undup_pct            40 gp_r… -7.41    7.36   -21.8      7.01 
##  9 avg_pct_met_a… undup_pct            40 gp_r… -2.07   10.8    -23.2     19.0  
## 10 avg_pct_met_a… undup_pct            40 gp_r… -6.34    7.60   -21.2      8.55 
## # ℹ 14 more rows
## # ℹ 2 more variables: p_value <dbl>, significant <lgl>

Unduplicate Peer Percent 75%

# ---- Usage Unduplicate Peer Percent 75%  ------------------------------------------------------
outcomes <- c(
  "chronic_absenteeism",
  "btb",
  "avg_pct_met_above_ELA",
  "avg_pct_met_above_Math",
  "avg_pct_not_met_ELA",
  "avg_pct_not_met_Math",
  "avg_scale_score_ELA",
  "avg_scale_score_Math"
)

upp_gp_rdd_75_comparison_table <- run_rdd_comparisons(
  df = df_clean,
  running_variable = "undup_pct",
  cutoff = 75,
  outcomes = outcomes,
  covariate_names = c("Charter", "DASS", "pct_hispanic", "pct_white", "pct_two_or_more"),
  local_bw = 5
)

print(upp_gp_rdd_75_comparison_table)
## # A tibble: 24 × 10
##    outcome running_variable cutoff model     tau    se ci_lower ci_upper p_value
##    <chr>   <chr>             <dbl> <chr>   <dbl> <dbl>    <dbl>    <dbl>   <dbl>
##  1 chroni… undup_pct            75 gp_r… -1.67   6.68   -14.8     11.4     0.803
##  2 chroni… undup_pct            75 gp_r… -1.67   6.68   -14.8     11.4     0.803
##  3 chroni… undup_pct            75 gp_r… -4.72   6.93   -18.3      8.86    0.496
##  4 btb     undup_pct            75 gp_r…  0.0944 0.225   -0.347    0.535   0.675
##  5 btb     undup_pct            75 gp_r…  0.0944 0.225   -0.347    0.535   0.675
##  6 btb     undup_pct            75 gp_r…  0.227  0.235   -0.233    0.687   0.333
##  7 avg_pc… undup_pct            75 gp_r… -5.28   6.62   -18.3      7.70    0.425
##  8 avg_pc… undup_pct            75 gp_r… -5.28   6.62   -18.3      7.70    0.425
##  9 avg_pc… undup_pct            75 gp_r… -3.67   7.76   -18.9     11.5     0.636
## 10 avg_pc… undup_pct            75 gp_r… -1.11   6.38   -13.6     11.4     0.862
## # ℹ 14 more rows
## # ℹ 1 more variable: significant <lgl>

Result comparison

classical_rdd_summary_tbl <- classical_rdd_summary_tbl %>% mutate(model="rdd", significant=p_value<0.05)

complete_results_table_summary <- classical_rdd_summary_tbl %>%
                                    rbind(frpm_gp_rdd_35_comparison_table) %>%
                                    rbind(frpm_gp_rdd_75_comparison_table) %>%
                                    rbind(upp_gp_rdd_40_comparison_table)  %>%
                                    rbind(upp_gp_rdd_75_comparison_table) %>%
                                    arrange(outcome, running_variable, cutoff, desc(model))
complete_results_table_summary
## # A tibble: 128 × 10
##    outcome  running_variable cutoff    tau    se ci_lower ci_upper p_value model
##    <chr>    <chr>             <dbl>  <dbl> <dbl>    <dbl>    <dbl>   <dbl> <chr>
##  1 avg_pct… frpm_percent         35 -12.9   6.00   -24.6     -1.09  0.0323 rdd  
##  2 avg_pct… frpm_percent         35 -14.9   6.74   -28.1     -1.69  0.0271 gp_r…
##  3 avg_pct… frpm_percent         35  -5.15  9.86   -24.5     14.2   0.601  gp_r…
##  4 avg_pct… frpm_percent         35 -14.9   6.74   -28.1     -1.69  0.0271 gp_r…
##  5 avg_pct… frpm_percent         75   8.10  4.95    -1.61    17.8   0.102  rdd  
##  6 avg_pct… frpm_percent         75   1.10  6.08   -10.8     13.0   0.856  gp_r…
##  7 avg_pct… frpm_percent         75   3.17 11.6    -19.6     26.0   0.785  gp_r…
##  8 avg_pct… frpm_percent         75   1.10  6.08   -10.8     13.0   0.856  gp_r…
##  9 avg_pct… undup_pct            40  -7.41  7.36   -21.8      7.01  0.314  gp_r…
## 10 avg_pct… undup_pct            40  -2.07 10.8    -23.2     19.0   0.848  gp_r…
## # ℹ 118 more rows
## # ℹ 1 more variable: significant <lgl>
view(complete_results_table_summary)
# 2 RVs
# 2 cutoffs
# 8 outcomes
# 4 models
# 128 total rows